1
0
Fork 0
tinmop/src/iri-parser.lisp

381 lines
13 KiB
Common Lisp
Raw Normal View History

2020-12-13 15:27:55 +01:00
;; tinmop: an humble gemini and pleroma client
;; Copyright (C) 2020 cage
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program.
;; If not, see [[http://www.gnu.org/licenses/][http://www.gnu.org/licenses/]].
(in-package :iri-parser)
(define-constant +segment-separator+ "/" :test #'string=)
2020-12-13 15:27:55 +01:00
(defrule alpha (character-ranges (#\a #\z) (#\A #\Z))
(:text t))
(defrule digit (character-ranges (#\0 #\9))
(:text t))
(defrule scheme-delim #\:
(:constant :scheme-delim))
(defrule iquery-delim #\?
(:constant :iquery-delim))
(defrule ifragment-delim #\#
(:constant :ifragment-delim))
(defrule port-delim #\:
(:constant :port-delim))
(defrule credential-delim #\@
(:constant :credential-delim))
(defrule iauthority-start "//"
(:constant :iauthority-start))
(defrule sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=)
(:text t))
(defrule gen-delims (or ":" "?" "#" "[" "]" "@" "")
(:text t))
(defrule iunreserved-chars (or alpha digit #\- #\. #\_ #\~ ucschar)
(:text t))
(defrule iprivate (or (character-ranges (#\UE000 #\UF8FF))
(character-ranges (#\UF0000 #\UFFFFD))
(character-ranges (#\U100000 #\U10FFFD)))
(:text t))
(defrule ucschar (or (character-ranges (#\UA0 #\UD7FF))
(character-ranges (#\UF900 #\UFDCF))
(character-ranges (#\UFDF0 #\UFFEF))
(character-ranges (#\U10000 #\U1FFFD))
(character-ranges (#\U20000 #\U2FFFD))
(character-ranges (#\U30000 #\U3FFFD))
(character-ranges (#\U40000 #\U4FFFD))
(character-ranges (#\U50000 #\U5FFFD))
(character-ranges (#\U60000 #\U6FFFD))
(character-ranges (#\U70000 #\U7FFFD))
(character-ranges (#\U80000 #\U8FFFD))
(character-ranges (#\U90000 #\U9FFFD))
(character-ranges (#\UA0000 #\UAFFFD))
(character-ranges (#\UB0000 #\UBFFFD))
(character-ranges (#\UC0000 #\UCFFFD))
(character-ranges (#\UD0000 #\UDFFFD))
(character-ranges (#\UE1000 #\UEFFFD)))
(:text t))
(defrule reserved-chars (or gen-delims sub-delims)
(:text t))
(defrule scheme (and alpha (* (or alpha digit "+" "-" "." )))
(:text t))
(defrule ihier-part (or (and iauthority-start iauthority ipath-abempty)
ipath-absolute ; text
ipath-rootless ;text
ipath-empty)) ;text
2020-12-13 15:27:55 +01:00
(defrule user-credentials (and iuserinfo credential-delim)
(:function first))
(defrule port-block (and port-delim port)
(:function second)
(:function parse-integer))
(defrule iauthority (and (? user-credentials)
ihost
(? port-block)))
(defrule ireg-name (* (or iunreserved-chars pct-encoded sub-delims ))
(:text t))
(defrule ihost (or ipv4-address ip-literal ireg-name)
(:text t))
(defrule port (+ digit)
(:text t))
(defrule iuserinfo (* (or iunreserved-chars pct-encoded sub-delims ":" ))
(:text t))
(defrule pct-encoded (and "%" hexdig hexdig)
(:text t))
(defrule hexdig (or (character-ranges (#\a #\f))
(character-ranges (#\A #\F))
digit)
2020-12-13 15:27:55 +01:00
(:text t))
(defrule ipv4-address (and dec-octet "." dec-octet "." dec-octet "." dec-octet)
(:text t))
(defrule ip-literal (and "["
(+ (not (or "[" "]")))
"]")
(:function (lambda (a) (text (second a)))))
2020-12-13 15:27:55 +01:00
(defrule ipchar (or iunreserved-chars pct-encoded sub-delims ":" "@")
(:text t))
(defrule isegment (* ipchar)
(:text t))
(defrule isegment-non-zero (+ ipchar)
(:text t))
(defrule isegment-nz-nc (+ (or iunreserved-chars pct-encoded sub-delims "@" ))
(:text t))
(defrule ipath-abempty (* (and "/" isegment))
(:text t))
(defrule ipath (or ipath-abempty
ipath-absolute
ipath-noscheme
ipath-rootless
ipath-empty)
2020-12-13 15:27:55 +01:00
(:text t))
(defrule ipath-absolute (and "/" (? (and isegment-non-zero (* (and "/" isegment )))))
2020-12-13 15:27:55 +01:00
(:text t))
(defrule ipath-rootless (and isegment-non-zero (* (and "/" isegment )))
(:text t))
(defrule ipath-noscheme (and isegment-nz-nc (* (and "/" isegment )))
(:text t))
(defrule ipath-empty ""
(:constant nil))
(defun octect-p (maybe-octect)
(ignore-errors
(let ((number (parse-integer (text-utils:strcat* maybe-octect))))
(when (<= 0 number 255)
number))))
(defrule dec-octet (octect-p (+ digit))
(:text t))
(defun extract-fields-from-absolute-iri (parsed)
(let* ((scheme (first parsed))
(ihier-part (third parsed))
(authority (when (consp ihier-part)
(second ihier-part)))
(user-credentials (first authority))
(host (second authority))
(port (third authority))
(ipath (if (consp ihier-part)
(third ihier-part)
ihier-part))
(iquery (fourth parsed))
(ifragment (fifth parsed)))
(list scheme
user-credentials
host
port
ipath
iquery
ifragment)))
2020-12-13 15:27:55 +01:00
(defrule iri (and scheme ":"
ihier-part
(? iquery)
(? ifragment))
(:function extract-fields-from-absolute-iri))
(defrule irelative-part (or (and iauthority-start
iauthority
ipath-abempty)
ipath-absolute
ipath-noscheme
ipath-empty))
(defun extract-fields-from-relative-iri-w-authority (parsed)
;; ((:IAUTHORITY-START (NIL "bar.baz" NIL) "/foo.gmi") "a=b" "afrag")
(let ((authority (second (first parsed)))
(path (third (first parsed))))
2020-12-13 15:27:55 +01:00
(list nil ; scheme
(first authority) ; user-credentials
(second authority) ; host
(third authority) ; port
path
(second parsed) ; iquery
(third parsed)))) ; fragment
(defun extract-fields-from-relative-iri-w/o-authority (parsed)
(list nil ; scheme
nil ; user-credentials
nil ; host
nil ; port
(first parsed) ; path
(second parsed) ; iquery
(third parsed))) ; fragment
(defun extract-fields-from-relative-iri (parsed)
(if (consp (first parsed))
(extract-fields-from-relative-iri-w-authority parsed)
(extract-fields-from-relative-iri-w/o-authority parsed)))
2020-12-13 15:27:55 +01:00
(defrule irelative-ref (and irelative-part (? iquery) (? ifragment))
(:function extract-fields-from-relative-iri))
(defrule iquery (and iquery-delim (* (or ipchar iprivate "/" "?")))
(:function second)
(:text t))
(defrule ifragment (and ifragment-delim (* (or ipchar "/" "?")))
(:function second)
(:text t))
(defrule iri-reference (or iri irelative-ref))
(defclass iri (uri:uri) ())
2020-12-13 15:27:55 +01:00
(defmethod print-object ((object iri) stream)
(print-unreadable-object (object stream)
(format stream
"~s ~s ~s ~s ~s ~s ~s"
(uri:scheme object)
(uri:user-info object)
(uri:host object)
(uri:port object)
(uri:path object)
(uri:query object)
(uri:fragment object))))
2020-12-13 15:27:55 +01:00
(defun make-iri (&optional scheme user-info host port path query fragment)
(make-instance 'iri
:scheme scheme
:user-info user-info
:host host
:port port
:path path
:query query
:fragment fragment))
(defun iri-parse (iri &key (null-on-error nil))
(handler-case
(let* ((parsed (parse 'iri-reference iri :junk-allowed nil))
(res (mapcar (lambda (a) (cond
((typep a 'string)
(if (text-utils:string-empty-p a)
nil
a))
(t a)))
(list (first parsed) ; scheme
(second parsed) ; user-credentials
(third parsed) ; host
(fourth parsed) ; port
(fifth parsed) ; path
(sixth parsed) ; query
(seventh parsed))))) ; fragment
(values (apply #'make-iri res)
res))
(esrap:esrap-parse-error (e)
(if null-on-error
nil
(error e)))))
2020-12-13 15:27:55 +01:00
(defun copy-iri (from)
(let ((scheme (uri:scheme from))
(user-info (uri:user-info from))
(host (slot-value from 'uri:host))
(port (uri:port from))
(path (uri:path from))
(query (uri:query from))
(fragment (uri:fragment from)))
2020-12-13 15:27:55 +01:00
(make-iri scheme
user-info
host
port
path
query
fragment)))
(defmethod normalize-path ((object iri))
(let ((clean-path (fs:normalize-path (uri:path object)))
(copy (copy-iri object)))
(when clean-path
(setf (uri:path copy) clean-path))
copy))
(defmethod normalize-path ((object uri:uri))
(let ((clean-path (fs:normalize-path (uri:path object)))
(copy (uri:copy-uri object)))
(when clean-path
(setf (uri:path copy) clean-path))
copy))
2020-12-13 15:27:55 +01:00
(defun render-iri (iri &optional (stream *standard-output*))
(flet ((render ()
(with-output-to-string (string-stream)
(let ((scheme (uri:scheme iri))
(user-info (uri:user-info iri))
(host (slot-value iri 'uri:host))
(port (uri:port iri))
(path (uri:path iri))
(query (uri:query iri))
(fragment (uri:fragment iri)))
2020-12-13 15:27:55 +01:00
(when scheme
(format string-stream "~a:" scheme))
(write-string "//" string-stream)
(when user-info
(format string-stream "~a@" user-info))
(when host
(format string-stream "~a" host))
(when port
(format string-stream ":~a" port))
(when path
(format string-stream "~a" path))
(when query
(format string-stream "?~a" query))
(when fragment
(format string-stream "#~a" fragment))))))
(write-string (render) stream)))
(defmethod to-s ((object iri) &key &allow-other-keys)
2020-12-13 15:27:55 +01:00
(with-output-to-string (stream)
(render-iri object stream)))
(defun absolute-url-p (url)
(when-let ((iri (iri:iri-parse url :null-on-error t)))
(not (or (null (uri:scheme iri))
(null (uri:host iri))))))
(defun ipv4-address-p (string)
(ignore-errors
(let ((bytes (mapcar #'parse-integer
(cl-ppcre:split "\\."
string))))
(and (= (length bytes)
4)
(every (lambda (a) (<= 0 a 255)) bytes)))))
(defun ipv6-address-p (string)
(cl-ppcre:scan ":" string))
(defun iri-to-parent-path (iri)
(let* ((parsed-iri (iri:iri-parse iri))
(parent-path (fs:parent-dir-path (uri:path parsed-iri)))
(new-iri (to-s (make-instance 'iri:iri
:scheme (uri:scheme parsed-iri)
:host (uri:host parsed-iri)
:user-info (uri:user-info parsed-iri)
:port (uri:port parsed-iri)
:path parent-path))))
new-iri))