From a14372b0951c755ad71434c840f5f8ebd12a8065 Mon Sep 17 00:00:00 2001 From: cage Date: Fri, 21 Jun 2024 18:09:45 +0200 Subject: [PATCH] - prefixed all parsing rule for iri parsing to avoid clash with other rules in the code. --- src/iri-parser.lisp | 141 +++++++++++++++++++++++--------------------- 1 file changed, 75 insertions(+), 66 deletions(-) diff --git a/src/iri-parser.lisp b/src/iri-parser.lisp index 056353a..3f2b817 100644 --- a/src/iri-parser.lisp +++ b/src/iri-parser.lisp @@ -19,46 +19,46 @@ (define-constant +segment-separator+ "/" :test #'string=) -(defrule alpha (character-ranges (#\a #\z) (#\A #\Z)) +(defrule iri-alpha (character-ranges (#\a #\z) (#\A #\Z)) (:text t)) -(defrule digit (character-ranges (#\0 #\9)) +(defrule iri-digit (character-ranges (#\0 #\9)) (:text t)) -(defrule scheme-delim #\: +(defrule iri-scheme-delim #\: (:constant :scheme-delim)) -(defrule iquery-delim #\? +(defrule iri-iquery-delim #\? (:constant :iquery-delim)) -(defrule ifragment-delim #\# +(defrule iri-ifragment-delim #\# (:constant :ifragment-delim)) -(defrule port-delim #\: +(defrule iri-port-delim #\: (:constant :port-delim)) -(defrule credential-delim #\@ +(defrule iri-credential-delim #\@ (:constant :credential-delim)) -(defrule iauthority-start "//" +(defrule iri-iauthority-start "//" (:constant :iauthority-start)) -(defrule sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=) +(defrule iri-sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=) (:text t)) -(defrule gen-delims (or ":" "?" "#" "[" "]" "@" "") +(defrule iri-gen-delims (or ":" "?" "#" "[" "]" "@" "") (:text t)) -(defrule iunreserved-chars (or alpha digit #\- #\. #\_ #\~ ucschar) +(defrule iri-iunreserved-chars (or iri-alpha iri-digit #\- #\. #\_ #\~ iri-ucschar) (:text t)) -(defrule iprivate (or (character-ranges (#\UE000 #\UF8FF)) - (character-ranges (#\UF0000 #\UFFFFD)) - (character-ranges (#\U100000 #\U10FFFD))) +(defrule iri-iprivate (or (character-ranges (#\UE000 #\UF8FF)) + (character-ranges (#\UF0000 #\UFFFFD)) + (character-ranges (#\U100000 #\U10FFFD))) (:text t)) -(defrule ucschar (or (character-ranges (#\UA0 #\UD7FF)) +(defrule iri-ucschar (or (character-ranges (#\UA0 #\UD7FF)) (character-ranges (#\UF900 #\UFDCF)) (character-ranges (#\UFDF0 #\UFFEF)) (character-ranges (#\U10000 #\U1FFFD)) @@ -77,88 +77,94 @@ (character-ranges (#\UE1000 #\UEFFFD))) (:text t)) -(defrule reserved-chars (or gen-delims sub-delims) +(defrule iri-reserved-chars (or iri-gen-delims iri-sub-delims) (:text t)) -(defrule scheme (and alpha (* (or alpha digit "+" "-" "." ))) +(defrule iri-scheme (and iri-alpha (* (or iri-alpha iri-digit "+" "-" "." ))) (:text t)) -(defrule ihier-part (or (and iauthority-start iauthority ipath-abempty) - ipath-absolute ; text - ipath-rootless ;text - ipath-empty)) ;text +(defrule iri-ihier-part (or (and iri-iauthority-start iri-iauthority iri-ipath-abempty) + iri-ipath-absolute ; text + iri-ipath-rootless ;text + iri-ipath-empty)) ;text -(defrule user-credentials (and iuserinfo credential-delim) +(defrule iri-user-credentials (and iri-iuserinfo iri-credential-delim) (:function first)) -(defrule port-block (and port-delim port) +(defrule iri-port-block (and iri-port-delim iri-port) (:function second) (:function parse-integer)) -(defrule iauthority (and (? user-credentials) - ihost - (? port-block))) +(defrule iri-iauthority (and (? iri-user-credentials) + iri-ihost + (? iri-port-block))) -(defrule ireg-name (* (or iunreserved-chars pct-encoded sub-delims )) +(defrule iri-ireg-name (* (or iri-iunreserved-chars iri-pct-encoded iri-sub-delims )) (:text t)) -(defrule ihost (or ipv4-address ip-literal ireg-name) +(defrule iri-ihost (or iri-ipv4-address iri-ip-literal iri-ireg-name) (:text t)) -(defrule port (+ digit) +(defrule iri-port (+ iri-digit) (:text t)) -(defrule iuserinfo (* (or iunreserved-chars pct-encoded sub-delims ":" )) +(defrule iri-iuserinfo (* (or iri-iunreserved-chars iri-pct-encoded iri-sub-delims ":" )) (:text t)) -(defrule pct-encoded (and "%" hexdig hexdig) +(defrule iri-pct-encoded (and "%" iri-hexdig iri-hexdig) (:text t)) -(defrule hexdig (or (character-ranges (#\a #\f)) - (character-ranges (#\A #\F)) - digit) +(defrule iri-hexdig (or (character-ranges (#\a #\f)) + (character-ranges (#\A #\F)) + iri-digit) (:text t)) -(defrule ipv4-address (and dec-octet "." dec-octet "." dec-octet "." dec-octet) +(defrule iri-ipv4-address (and iri-dec-octet "." + iri-dec-octet "." + iri-dec-octet "." + iri-dec-octet) (:text t)) -(defrule ip-literal (and "[" +(defrule iri-ip-literal (and "[" (+ (not (or "[" "]"))) "]") (:function (lambda (a) (text (second a))))) -(defrule ipchar (or iunreserved-chars pct-encoded sub-delims ":" "@") +(defrule iri-ipchar (or iri-iunreserved-chars iri-pct-encoded iri-sub-delims ":" "@") (:text t)) -(defrule isegment (* ipchar) +(defrule iri-isegment (* iri-ipchar) (:text t)) -(defrule isegment-non-zero (+ ipchar) +(defrule iri-isegment-non-zero (+ iri-ipchar) (:text t)) -(defrule isegment-nz-nc (+ (or iunreserved-chars pct-encoded sub-delims "@" )) +(defrule iri-isegment-nz-nc (+ (or iri-iunreserved-chars + iri-pct-encoded + iri-sub-delims "@" )) (:text t)) -(defrule ipath-abempty (* (and "/" isegment)) +(defrule iri-ipath-abempty (* (and "/" iri-isegment)) (:text t)) -(defrule ipath (or ipath-abempty - ipath-absolute - ipath-noscheme - ipath-rootless - ipath-empty) +(defrule iri-ipath (or iri-ipath-abempty + iri-ipath-absolute + iri-ipath-noscheme + iri-ipath-rootless + iri-ipath-empty) (:text t)) -(defrule ipath-absolute (and "/" (? (and isegment-non-zero (* (and "/" isegment ))))) +(defrule iri-ipath-absolute (and "/" (? (and iri-isegment-non-zero (* (and "/" + iri-isegment ))))) (:text t)) -(defrule ipath-rootless (and isegment-non-zero (* (and "/" isegment ))) +(defrule iri-ipath-rootless (and iri-isegment-non-zero (* (and "/" iri-isegment ))) (:text t)) -(defrule ipath-noscheme (and isegment-nz-nc (* (and "/" isegment ))) +(defrule iri-ipath-noscheme (and iri-isegment-nz-nc (* (and "/" iri-isegment ))) (:text t)) -(defrule ipath-empty "" +(defrule iri-ipath-empty "" (:constant nil)) (defun octect-p (maybe-octect) @@ -167,7 +173,7 @@ (when (<= 0 number 255) number)))) -(defrule dec-octet (octect-p (+ digit)) +(defrule iri-dec-octet (octect-p (+ digit)) (:text t)) (defun extract-fields-from-absolute-iri (parsed) @@ -191,18 +197,18 @@ iquery ifragment))) -(defrule iri (and scheme ":" - ihier-part - (? iquery) - (? ifragment)) +(defrule iri-iri (and iri-scheme ":" + iri-ihier-part + (? iri-iquery) + (? iri-ifragment)) (:function extract-fields-from-absolute-iri)) -(defrule irelative-part (or (and iauthority-start - iauthority - ipath-abempty) - ipath-absolute - ipath-noscheme - ipath-empty)) +(defrule iri-irelative-part (or (and iri-iauthority-start + iri-iauthority + iri-ipath-abempty) + iri-ipath-absolute + iri-ipath-noscheme + iri-ipath-empty)) (defun extract-fields-from-relative-iri-w-authority (parsed) ;; ((:IAUTHORITY-START (NIL "bar.baz" NIL) "/foo.gmi") "a=b" "afrag") @@ -230,18 +236,21 @@ (extract-fields-from-relative-iri-w-authority parsed) (extract-fields-from-relative-iri-w/o-authority parsed))) -(defrule irelative-ref (and irelative-part (? iquery) (? ifragment)) +(defrule iri-irelative-ref (and iri-irelative-part (? iri-iquery) (? iri-ifragment)) (:function extract-fields-from-relative-iri)) -(defrule iquery (and iquery-delim (* (or ipchar iprivate "/" "?"))) +(defrule iri-iquery (and iri-iquery-delim (* (or iri-ipchar + iri-iprivate + "/" + "?"))) (:function second) (:text t)) -(defrule ifragment (and ifragment-delim (* (or ipchar "/" "?"))) +(defrule iri-ifragment (and iri-ifragment-delim (* (or iri-ipchar "/" "?"))) (:function second) (:text t)) -(defrule iri-reference (or iri irelative-ref)) +(defrule iri-iri-reference (or iri-iri iri-irelative-ref)) (defclass iri (uri:uri) ()) @@ -269,7 +278,7 @@ (defun iri-parse (iri &key (null-on-error nil)) (handler-case - (let* ((parsed (parse 'iri-reference iri :junk-allowed nil)) + (let* ((parsed (parse 'iri-iri-reference iri :junk-allowed nil)) (res (mapcar (lambda (a) (cond ((typep a 'string) (if (text-utils:string-empty-p a)