1
0
Fork 0

- prefixed all parsing rule for iri parsing to avoid clash with other rules in the code.

This commit is contained in:
cage 2024-06-21 18:09:45 +02:00
parent 81bf22b110
commit a14372b095
1 changed files with 75 additions and 66 deletions

View File

@ -19,46 +19,46 @@
(define-constant +segment-separator+ "/" :test #'string=)
(defrule alpha (character-ranges (#\a #\z) (#\A #\Z))
(defrule iri-alpha (character-ranges (#\a #\z) (#\A #\Z))
(:text t))
(defrule digit (character-ranges (#\0 #\9))
(defrule iri-digit (character-ranges (#\0 #\9))
(:text t))
(defrule scheme-delim #\:
(defrule iri-scheme-delim #\:
(:constant :scheme-delim))
(defrule iquery-delim #\?
(defrule iri-iquery-delim #\?
(:constant :iquery-delim))
(defrule ifragment-delim #\#
(defrule iri-ifragment-delim #\#
(:constant :ifragment-delim))
(defrule port-delim #\:
(defrule iri-port-delim #\:
(:constant :port-delim))
(defrule credential-delim #\@
(defrule iri-credential-delim #\@
(:constant :credential-delim))
(defrule iauthority-start "//"
(defrule iri-iauthority-start "//"
(:constant :iauthority-start))
(defrule sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=)
(defrule iri-sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=)
(:text t))
(defrule gen-delims (or ":" "?" "#" "[" "]" "@" "")
(defrule iri-gen-delims (or ":" "?" "#" "[" "]" "@" "")
(:text t))
(defrule iunreserved-chars (or alpha digit #\- #\. #\_ #\~ ucschar)
(defrule iri-iunreserved-chars (or iri-alpha iri-digit #\- #\. #\_ #\~ iri-ucschar)
(:text t))
(defrule iprivate (or (character-ranges (#\UE000 #\UF8FF))
(character-ranges (#\UF0000 #\UFFFFD))
(character-ranges (#\U100000 #\U10FFFD)))
(defrule iri-iprivate (or (character-ranges (#\UE000 #\UF8FF))
(character-ranges (#\UF0000 #\UFFFFD))
(character-ranges (#\U100000 #\U10FFFD)))
(:text t))
(defrule ucschar (or (character-ranges (#\UA0 #\UD7FF))
(defrule iri-ucschar (or (character-ranges (#\UA0 #\UD7FF))
(character-ranges (#\UF900 #\UFDCF))
(character-ranges (#\UFDF0 #\UFFEF))
(character-ranges (#\U10000 #\U1FFFD))
@ -77,88 +77,94 @@
(character-ranges (#\UE1000 #\UEFFFD)))
(:text t))
(defrule reserved-chars (or gen-delims sub-delims)
(defrule iri-reserved-chars (or iri-gen-delims iri-sub-delims)
(:text t))
(defrule scheme (and alpha (* (or alpha digit "+" "-" "." )))
(defrule iri-scheme (and iri-alpha (* (or iri-alpha iri-digit "+" "-" "." )))
(:text t))
(defrule ihier-part (or (and iauthority-start iauthority ipath-abempty)
ipath-absolute ; text
ipath-rootless ;text
ipath-empty)) ;text
(defrule iri-ihier-part (or (and iri-iauthority-start iri-iauthority iri-ipath-abempty)
iri-ipath-absolute ; text
iri-ipath-rootless ;text
iri-ipath-empty)) ;text
(defrule user-credentials (and iuserinfo credential-delim)
(defrule iri-user-credentials (and iri-iuserinfo iri-credential-delim)
(:function first))
(defrule port-block (and port-delim port)
(defrule iri-port-block (and iri-port-delim iri-port)
(:function second)
(:function parse-integer))
(defrule iauthority (and (? user-credentials)
ihost
(? port-block)))
(defrule iri-iauthority (and (? iri-user-credentials)
iri-ihost
(? iri-port-block)))
(defrule ireg-name (* (or iunreserved-chars pct-encoded sub-delims ))
(defrule iri-ireg-name (* (or iri-iunreserved-chars iri-pct-encoded iri-sub-delims ))
(:text t))
(defrule ihost (or ipv4-address ip-literal ireg-name)
(defrule iri-ihost (or iri-ipv4-address iri-ip-literal iri-ireg-name)
(:text t))
(defrule port (+ digit)
(defrule iri-port (+ iri-digit)
(:text t))
(defrule iuserinfo (* (or iunreserved-chars pct-encoded sub-delims ":" ))
(defrule iri-iuserinfo (* (or iri-iunreserved-chars iri-pct-encoded iri-sub-delims ":" ))
(:text t))
(defrule pct-encoded (and "%" hexdig hexdig)
(defrule iri-pct-encoded (and "%" iri-hexdig iri-hexdig)
(:text t))
(defrule hexdig (or (character-ranges (#\a #\f))
(character-ranges (#\A #\F))
digit)
(defrule iri-hexdig (or (character-ranges (#\a #\f))
(character-ranges (#\A #\F))
iri-digit)
(:text t))
(defrule ipv4-address (and dec-octet "." dec-octet "." dec-octet "." dec-octet)
(defrule iri-ipv4-address (and iri-dec-octet "."
iri-dec-octet "."
iri-dec-octet "."
iri-dec-octet)
(:text t))
(defrule ip-literal (and "["
(defrule iri-ip-literal (and "["
(+ (not (or "[" "]")))
"]")
(:function (lambda (a) (text (second a)))))
(defrule ipchar (or iunreserved-chars pct-encoded sub-delims ":" "@")
(defrule iri-ipchar (or iri-iunreserved-chars iri-pct-encoded iri-sub-delims ":" "@")
(:text t))
(defrule isegment (* ipchar)
(defrule iri-isegment (* iri-ipchar)
(:text t))
(defrule isegment-non-zero (+ ipchar)
(defrule iri-isegment-non-zero (+ iri-ipchar)
(:text t))
(defrule isegment-nz-nc (+ (or iunreserved-chars pct-encoded sub-delims "@" ))
(defrule iri-isegment-nz-nc (+ (or iri-iunreserved-chars
iri-pct-encoded
iri-sub-delims "@" ))
(:text t))
(defrule ipath-abempty (* (and "/" isegment))
(defrule iri-ipath-abempty (* (and "/" iri-isegment))
(:text t))
(defrule ipath (or ipath-abempty
ipath-absolute
ipath-noscheme
ipath-rootless
ipath-empty)
(defrule iri-ipath (or iri-ipath-abempty
iri-ipath-absolute
iri-ipath-noscheme
iri-ipath-rootless
iri-ipath-empty)
(:text t))
(defrule ipath-absolute (and "/" (? (and isegment-non-zero (* (and "/" isegment )))))
(defrule iri-ipath-absolute (and "/" (? (and iri-isegment-non-zero (* (and "/"
iri-isegment )))))
(:text t))
(defrule ipath-rootless (and isegment-non-zero (* (and "/" isegment )))
(defrule iri-ipath-rootless (and iri-isegment-non-zero (* (and "/" iri-isegment )))
(:text t))
(defrule ipath-noscheme (and isegment-nz-nc (* (and "/" isegment )))
(defrule iri-ipath-noscheme (and iri-isegment-nz-nc (* (and "/" iri-isegment )))
(:text t))
(defrule ipath-empty ""
(defrule iri-ipath-empty ""
(:constant nil))
(defun octect-p (maybe-octect)
@ -167,7 +173,7 @@
(when (<= 0 number 255)
number))))
(defrule dec-octet (octect-p (+ digit))
(defrule iri-dec-octet (octect-p (+ digit))
(:text t))
(defun extract-fields-from-absolute-iri (parsed)
@ -191,18 +197,18 @@
iquery
ifragment)))
(defrule iri (and scheme ":"
ihier-part
(? iquery)
(? ifragment))
(defrule iri-iri (and iri-scheme ":"
iri-ihier-part
(? iri-iquery)
(? iri-ifragment))
(:function extract-fields-from-absolute-iri))
(defrule irelative-part (or (and iauthority-start
iauthority
ipath-abempty)
ipath-absolute
ipath-noscheme
ipath-empty))
(defrule iri-irelative-part (or (and iri-iauthority-start
iri-iauthority
iri-ipath-abempty)
iri-ipath-absolute
iri-ipath-noscheme
iri-ipath-empty))
(defun extract-fields-from-relative-iri-w-authority (parsed)
;; ((:IAUTHORITY-START (NIL "bar.baz" NIL) "/foo.gmi") "a=b" "afrag")
@ -230,18 +236,21 @@
(extract-fields-from-relative-iri-w-authority parsed)
(extract-fields-from-relative-iri-w/o-authority parsed)))
(defrule irelative-ref (and irelative-part (? iquery) (? ifragment))
(defrule iri-irelative-ref (and iri-irelative-part (? iri-iquery) (? iri-ifragment))
(:function extract-fields-from-relative-iri))
(defrule iquery (and iquery-delim (* (or ipchar iprivate "/" "?")))
(defrule iri-iquery (and iri-iquery-delim (* (or iri-ipchar
iri-iprivate
"/"
"?")))
(:function second)
(:text t))
(defrule ifragment (and ifragment-delim (* (or ipchar "/" "?")))
(defrule iri-ifragment (and iri-ifragment-delim (* (or iri-ipchar "/" "?")))
(:function second)
(:text t))
(defrule iri-reference (or iri irelative-ref))
(defrule iri-iri-reference (or iri-iri iri-irelative-ref))
(defclass iri (uri:uri) ())
@ -269,7 +278,7 @@
(defun iri-parse (iri &key (null-on-error nil))
(handler-case
(let* ((parsed (parse 'iri-reference iri :junk-allowed nil))
(let* ((parsed (parse 'iri-iri-reference iri :junk-allowed nil))
(res (mapcar (lambda (a) (cond
((typep a 'string)
(if (text-utils:string-empty-p a)