2020-12-13 15:27:55 +01:00
|
|
|
;; tinmop: an humble gemini and pleroma client
|
|
|
|
;; Copyright (C) 2020 cage
|
|
|
|
|
|
|
|
;; This program is free software: you can redistribute it and/or modify
|
|
|
|
;; it under the terms of the GNU General Public License as published by
|
|
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
|
|
;; (at your option) any later version.
|
|
|
|
|
|
|
|
;; This program is distributed in the hope that it will be useful,
|
|
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
;; GNU General Public License for more details.
|
|
|
|
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
|
|
;; along with this program.
|
|
|
|
;; If not, see [[http://www.gnu.org/licenses/][http://www.gnu.org/licenses/]].
|
|
|
|
|
|
|
|
(in-package :iri-parser)
|
|
|
|
|
2021-03-27 09:19:13 +01:00
|
|
|
(define-constant +segment-separator+ "/" :test #'string=)
|
|
|
|
|
2020-12-13 15:27:55 +01:00
|
|
|
(defrule alpha (character-ranges (#\a #\z) (#\A #\Z))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule digit (character-ranges (#\0 #\9))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule scheme-delim #\:
|
|
|
|
(:constant :scheme-delim))
|
|
|
|
|
|
|
|
(defrule iquery-delim #\?
|
|
|
|
(:constant :iquery-delim))
|
|
|
|
|
|
|
|
(defrule ifragment-delim #\#
|
|
|
|
(:constant :ifragment-delim))
|
|
|
|
|
|
|
|
(defrule port-delim #\:
|
|
|
|
(:constant :port-delim))
|
|
|
|
|
|
|
|
(defrule credential-delim #\@
|
|
|
|
(:constant :credential-delim))
|
|
|
|
|
|
|
|
(defrule iauthority-start "//"
|
|
|
|
(:constant :iauthority-start))
|
|
|
|
|
|
|
|
(defrule sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule gen-delims (or ":" "?" "#" "[" "]" "@" "")
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule iunreserved-chars (or alpha digit #\- #\. #\_ #\~ ucschar)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule iprivate (or (character-ranges (#\UE000 #\UF8FF))
|
|
|
|
(character-ranges (#\UF0000 #\UFFFFD))
|
|
|
|
(character-ranges (#\U100000 #\U10FFFD)))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
|
|
|
|
(defrule ucschar (or (character-ranges (#\UA0 #\UD7FF))
|
|
|
|
(character-ranges (#\UF900 #\UFDCF))
|
|
|
|
(character-ranges (#\UFDF0 #\UFFEF))
|
|
|
|
(character-ranges (#\U10000 #\U1FFFD))
|
|
|
|
(character-ranges (#\U20000 #\U2FFFD))
|
|
|
|
(character-ranges (#\U30000 #\U3FFFD))
|
|
|
|
(character-ranges (#\U40000 #\U4FFFD))
|
|
|
|
(character-ranges (#\U50000 #\U5FFFD))
|
|
|
|
(character-ranges (#\U60000 #\U6FFFD))
|
|
|
|
(character-ranges (#\U70000 #\U7FFFD))
|
|
|
|
(character-ranges (#\U80000 #\U8FFFD))
|
|
|
|
(character-ranges (#\U90000 #\U9FFFD))
|
|
|
|
(character-ranges (#\UA0000 #\UAFFFD))
|
|
|
|
(character-ranges (#\UB0000 #\UBFFFD))
|
|
|
|
(character-ranges (#\UC0000 #\UCFFFD))
|
|
|
|
(character-ranges (#\UD0000 #\UDFFFD))
|
|
|
|
(character-ranges (#\UE1000 #\UEFFFD)))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule reserved-chars (or gen-delims sub-delims)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule scheme (and alpha (* (or alpha digit "+" "-" "." )))
|
|
|
|
(:text t))
|
|
|
|
|
2021-04-01 20:33:29 +02:00
|
|
|
(defrule ihier-part (or (and iauthority-start iauthority ipath-abempty)
|
|
|
|
ipath-absolute ; text
|
|
|
|
ipath-rootless ;text
|
|
|
|
ipath-empty)) ;text
|
2020-12-13 15:27:55 +01:00
|
|
|
|
|
|
|
(defrule user-credentials (and iuserinfo credential-delim)
|
|
|
|
(:function first))
|
|
|
|
|
|
|
|
(defrule port-block (and port-delim port)
|
|
|
|
(:function second)
|
|
|
|
(:function parse-integer))
|
|
|
|
|
|
|
|
(defrule iauthority (and (? user-credentials)
|
|
|
|
ihost
|
|
|
|
(? port-block)))
|
|
|
|
|
|
|
|
(defrule ireg-name (* (or iunreserved-chars pct-encoded sub-delims ))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ihost (or ipv4-address ip-literal ireg-name)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule port (+ digit)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule iuserinfo (* (or iunreserved-chars pct-encoded sub-delims ":" ))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule pct-encoded (and "%" hexdig hexdig)
|
|
|
|
(:text t))
|
|
|
|
|
2021-03-15 19:28:07 +01:00
|
|
|
(defrule hexdig (or (character-ranges (#\a #\f))
|
|
|
|
(character-ranges (#\A #\F))
|
|
|
|
digit)
|
2020-12-13 15:27:55 +01:00
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ipv4-address (and dec-octet "." dec-octet "." dec-octet "." dec-octet)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ip-literal (and "["
|
|
|
|
(+ (not (or "[" "]")))
|
|
|
|
"]")
|
2021-04-16 14:15:41 +02:00
|
|
|
(:function (lambda (a) (text (second a)))))
|
2020-12-13 15:27:55 +01:00
|
|
|
|
|
|
|
(defrule ipchar (or iunreserved-chars pct-encoded sub-delims ":" "@")
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule isegment (* ipchar)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule isegment-non-zero (+ ipchar)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule isegment-nz-nc (+ (or iunreserved-chars pct-encoded sub-delims "@" ))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ipath-abempty (* (and "/" isegment))
|
|
|
|
(:text t))
|
|
|
|
|
2020-12-25 15:03:39 +01:00
|
|
|
(defrule ipath (or ipath-abempty
|
|
|
|
ipath-absolute
|
|
|
|
ipath-noscheme
|
|
|
|
ipath-rootless
|
|
|
|
ipath-empty)
|
2020-12-13 15:27:55 +01:00
|
|
|
(:text t))
|
|
|
|
|
2020-12-25 15:03:39 +01:00
|
|
|
(defrule ipath-absolute (and "/" (? (and isegment-non-zero (* (and "/" isegment )))))
|
2020-12-13 15:27:55 +01:00
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ipath-rootless (and isegment-non-zero (* (and "/" isegment )))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ipath-noscheme (and isegment-nz-nc (* (and "/" isegment )))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ipath-empty ""
|
|
|
|
(:constant nil))
|
|
|
|
|
|
|
|
(defun octect-p (maybe-octect)
|
|
|
|
(ignore-errors
|
|
|
|
(let ((number (parse-integer (text-utils:strcat* maybe-octect))))
|
|
|
|
(when (<= 0 number 255)
|
|
|
|
number))))
|
|
|
|
|
|
|
|
(defrule dec-octet (octect-p (+ digit))
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defun extract-fields-from-absolute-iri (parsed)
|
2021-04-01 20:33:29 +02:00
|
|
|
(let* ((scheme (first parsed))
|
|
|
|
(ihier-part (third parsed))
|
|
|
|
(authority (when (consp ihier-part)
|
|
|
|
(second ihier-part)))
|
|
|
|
(user-credentials (first authority))
|
|
|
|
(host (second authority))
|
|
|
|
(port (third authority))
|
|
|
|
(ipath (if (consp ihier-part)
|
|
|
|
(third ihier-part)
|
|
|
|
ihier-part))
|
|
|
|
(iquery (fourth parsed))
|
|
|
|
(ifragment (fifth parsed)))
|
|
|
|
(list scheme
|
|
|
|
user-credentials
|
|
|
|
host
|
|
|
|
port
|
|
|
|
ipath
|
|
|
|
iquery
|
|
|
|
ifragment)))
|
2020-12-13 15:27:55 +01:00
|
|
|
|
|
|
|
(defrule iri (and scheme ":"
|
|
|
|
ihier-part
|
|
|
|
(? iquery)
|
|
|
|
(? ifragment))
|
|
|
|
(:function extract-fields-from-absolute-iri))
|
|
|
|
|
2020-12-25 15:03:39 +01:00
|
|
|
(defrule irelative-part (or (and iauthority-start
|
|
|
|
iauthority
|
|
|
|
ipath-abempty)
|
|
|
|
ipath-absolute
|
|
|
|
ipath-noscheme
|
|
|
|
ipath-empty))
|
|
|
|
|
|
|
|
(defun extract-fields-from-relative-iri-w-authority (parsed)
|
|
|
|
;; ((:IAUTHORITY-START (NIL "bar.baz" NIL) "/foo.gmi") "a=b" "afrag")
|
|
|
|
(let ((authority (second (first parsed)))
|
|
|
|
(path (third (first parsed))))
|
2020-12-13 15:27:55 +01:00
|
|
|
(list nil ; scheme
|
|
|
|
(first authority) ; user-credentials
|
|
|
|
(second authority) ; host
|
|
|
|
(third authority) ; port
|
|
|
|
path
|
|
|
|
(second parsed) ; iquery
|
2020-12-25 15:03:39 +01:00
|
|
|
(third parsed)))) ; fragment
|
|
|
|
|
|
|
|
(defun extract-fields-from-relative-iri-w/o-authority (parsed)
|
|
|
|
(list nil ; scheme
|
|
|
|
nil ; user-credentials
|
|
|
|
nil ; host
|
|
|
|
nil ; port
|
|
|
|
(first parsed) ; path
|
|
|
|
(second parsed) ; iquery
|
|
|
|
(third parsed))) ; fragment
|
|
|
|
|
|
|
|
(defun extract-fields-from-relative-iri (parsed)
|
|
|
|
(if (consp (first parsed))
|
|
|
|
(extract-fields-from-relative-iri-w-authority parsed)
|
|
|
|
(extract-fields-from-relative-iri-w/o-authority parsed)))
|
2020-12-13 15:27:55 +01:00
|
|
|
|
|
|
|
(defrule irelative-ref (and irelative-part (? iquery) (? ifragment))
|
|
|
|
(:function extract-fields-from-relative-iri))
|
|
|
|
|
|
|
|
(defrule iquery (and iquery-delim (* (or ipchar iprivate "/" "?")))
|
|
|
|
(:function second)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule ifragment (and ifragment-delim (* (or ipchar "/" "?")))
|
|
|
|
(:function second)
|
|
|
|
(:text t))
|
|
|
|
|
|
|
|
(defrule iri-reference (or iri irelative-ref))
|
|
|
|
|
2020-12-13 16:27:22 +01:00
|
|
|
(defclass iri (uri:uri) ())
|
2020-12-13 15:27:55 +01:00
|
|
|
|
2021-12-04 14:33:53 +01:00
|
|
|
(defmethod print-object ((object iri) stream)
|
|
|
|
(print-unreadable-object (object stream)
|
|
|
|
(format stream
|
|
|
|
"~s ~s ~s ~s ~s ~s ~s"
|
|
|
|
(uri:scheme object)
|
|
|
|
(uri:user-info object)
|
|
|
|
(uri:host object)
|
|
|
|
(uri:port object)
|
|
|
|
(uri:path object)
|
|
|
|
(uri:query object)
|
|
|
|
(uri:fragment object))))
|
|
|
|
|
2020-12-13 15:27:55 +01:00
|
|
|
(defun make-iri (&optional scheme user-info host port path query fragment)
|
|
|
|
(make-instance 'iri
|
|
|
|
:scheme scheme
|
|
|
|
:user-info user-info
|
|
|
|
:host host
|
|
|
|
:port port
|
|
|
|
:path path
|
|
|
|
:query query
|
|
|
|
:fragment fragment))
|
|
|
|
|
2021-03-27 09:19:13 +01:00
|
|
|
(defun iri-parse (iri &key (null-on-error nil))
|
|
|
|
(handler-case
|
|
|
|
(let* ((parsed (parse 'iri-reference iri :junk-allowed nil))
|
|
|
|
(res (mapcar (lambda (a) (cond
|
|
|
|
((typep a 'string)
|
|
|
|
(if (text-utils:string-empty-p a)
|
|
|
|
nil
|
|
|
|
a))
|
|
|
|
(t a)))
|
|
|
|
(list (first parsed) ; scheme
|
|
|
|
(second parsed) ; user-credentials
|
|
|
|
(third parsed) ; host
|
|
|
|
(fourth parsed) ; port
|
|
|
|
(fifth parsed) ; path
|
|
|
|
(sixth parsed) ; query
|
|
|
|
(seventh parsed))))) ; fragment
|
|
|
|
(values (apply #'make-iri res)
|
|
|
|
res))
|
|
|
|
(esrap:esrap-parse-error (e)
|
|
|
|
(if null-on-error
|
|
|
|
nil
|
|
|
|
(error e)))))
|
2020-12-13 15:27:55 +01:00
|
|
|
|
|
|
|
(defun copy-iri (from)
|
2020-12-13 15:29:04 +01:00
|
|
|
(let ((scheme (uri:scheme from))
|
|
|
|
(user-info (uri:user-info from))
|
|
|
|
(host (slot-value from 'uri:host))
|
|
|
|
(port (uri:port from))
|
|
|
|
(path (uri:path from))
|
|
|
|
(query (uri:query from))
|
|
|
|
(fragment (uri:fragment from)))
|
2020-12-13 15:27:55 +01:00
|
|
|
(make-iri scheme
|
|
|
|
user-info
|
|
|
|
host
|
|
|
|
port
|
|
|
|
path
|
|
|
|
query
|
|
|
|
fragment)))
|
|
|
|
|
2022-01-28 12:24:24 +01:00
|
|
|
(defmethod normalize-path ((object iri))
|
|
|
|
(let ((clean-path (fs:normalize-path (uri:path object)))
|
2020-12-13 16:27:22 +01:00
|
|
|
(copy (copy-iri object)))
|
|
|
|
(when clean-path
|
|
|
|
(setf (uri:path copy) clean-path))
|
|
|
|
copy))
|
|
|
|
|
2022-01-28 12:24:24 +01:00
|
|
|
(defmethod normalize-path ((object uri:uri))
|
|
|
|
(let ((clean-path (fs:normalize-path (uri:path object)))
|
|
|
|
(copy (uri:copy-uri object)))
|
|
|
|
(when clean-path
|
|
|
|
(setf (uri:path copy) clean-path))
|
|
|
|
copy))
|
|
|
|
|
2020-12-13 15:27:55 +01:00
|
|
|
(defun render-iri (iri &optional (stream *standard-output*))
|
|
|
|
(flet ((render ()
|
|
|
|
(with-output-to-string (string-stream)
|
2020-12-13 15:29:04 +01:00
|
|
|
(let ((scheme (uri:scheme iri))
|
|
|
|
(user-info (uri:user-info iri))
|
2020-12-14 13:57:29 +01:00
|
|
|
(host (slot-value iri 'uri:host))
|
2020-12-13 15:29:04 +01:00
|
|
|
(port (uri:port iri))
|
|
|
|
(path (uri:path iri))
|
|
|
|
(query (uri:query iri))
|
|
|
|
(fragment (uri:fragment iri)))
|
2020-12-13 15:27:55 +01:00
|
|
|
(when scheme
|
|
|
|
(format string-stream "~a:" scheme))
|
|
|
|
(write-string "//" string-stream)
|
|
|
|
(when user-info
|
|
|
|
(format string-stream "~a@" user-info))
|
|
|
|
(when host
|
|
|
|
(format string-stream "~a" host))
|
|
|
|
(when port
|
|
|
|
(format string-stream ":~a" port))
|
|
|
|
(when path
|
|
|
|
(format string-stream "~a" path))
|
|
|
|
(when query
|
|
|
|
(format string-stream "?~a" query))
|
|
|
|
(when fragment
|
|
|
|
(format string-stream "#~a" fragment))))))
|
|
|
|
(write-string (render) stream)))
|
|
|
|
|
2022-07-02 10:55:11 +02:00
|
|
|
(defmethod to-s ((object iri) &key &allow-other-keys)
|
2020-12-13 15:27:55 +01:00
|
|
|
(with-output-to-string (stream)
|
|
|
|
(render-iri object stream)))
|
2021-03-27 09:19:13 +01:00
|
|
|
|
|
|
|
(defun absolute-url-p (url)
|
|
|
|
(when-let ((iri (iri:iri-parse url :null-on-error t)))
|
2022-10-02 14:19:50 +02:00
|
|
|
(not (or (null (uri:scheme iri))
|
|
|
|
(null (uri:host iri))))))
|
2021-04-16 14:44:22 +02:00
|
|
|
|
|
|
|
(defun ipv4-address-p (string)
|
|
|
|
(ignore-errors
|
|
|
|
(let ((bytes (mapcar #'parse-integer
|
|
|
|
(cl-ppcre:split "\\."
|
|
|
|
string))))
|
|
|
|
(and (= (length bytes)
|
|
|
|
4)
|
|
|
|
(every (lambda (a) (<= 0 a 255)) bytes)))))
|
|
|
|
|
|
|
|
(defun ipv6-address-p (string)
|
|
|
|
(cl-ppcre:scan ":" string))
|
2023-04-06 15:06:31 +02:00
|
|
|
|
|
|
|
(defun iri-to-parent-path (iri)
|
|
|
|
(let* ((parsed-iri (iri:iri-parse iri))
|
|
|
|
(parent-path (fs:parent-dir-path (uri:path parsed-iri)))
|
|
|
|
(new-iri (to-s (make-instance 'iri:iri
|
|
|
|
:scheme (uri:scheme parsed-iri)
|
|
|
|
:host (uri:host parsed-iri)
|
|
|
|
:user-info (uri:user-info parsed-iri)
|
|
|
|
:port (uri:port parsed-iri)
|
|
|
|
:path parent-path))))
|
|
|
|
new-iri))
|