1
0
Fork 0

- added IRI parser.

This commit is contained in:
cage 2020-12-13 15:27:55 +01:00
parent 5f8c9d422e
commit 13fdc439a8
6 changed files with 385 additions and 1 deletions

View File

@ -294,7 +294,9 @@
(when query
(setf uri (strcat uri "?" (percent-encode query))))
(cl+ssl:with-global-context (ctx :auto-free-p t)
(let ((socket (usocket:socket-connect host port :element-type '(unsigned-byte 8))))
(let ((socket (usocket:socket-connect host
port
:element-type '(unsigned-byte 8))))
(unwind-protect
(when socket
(let* ((stream (usocket:socket-stream socket))

297
src/iri-parser.lisp Normal file
View File

@ -0,0 +1,297 @@
;; tinmop: an humble gemini and pleroma client
;; Copyright (C) 2020 cage
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program.
;; If not, see [[http://www.gnu.org/licenses/][http://www.gnu.org/licenses/]].
(in-package :iri-parser)
(defrule alpha (character-ranges (#\a #\z) (#\A #\Z))
(:text t))
(defrule digit (character-ranges (#\0 #\9))
(:text t))
(defrule scheme-delim #\:
(:constant :scheme-delim))
(defrule iquery-delim #\?
(:constant :iquery-delim))
(defrule ifragment-delim #\#
(:constant :ifragment-delim))
(defrule port-delim #\:
(:constant :port-delim))
(defrule credential-delim #\@
(:constant :credential-delim))
(defrule iauthority-start "//"
(:constant :iauthority-start))
(defrule sub-delims (or #\! #\$ #\& #\' #\( #\) #\* #\+ #\, #\; #\=)
(:text t))
(defrule gen-delims (or ":" "?" "#" "[" "]" "@" "")
(:text t))
(defrule iunreserved-chars (or alpha digit #\- #\. #\_ #\~ ucschar)
(:text t))
(defrule iprivate (or (character-ranges (#\UE000 #\UF8FF))
(character-ranges (#\UF0000 #\UFFFFD))
(character-ranges (#\U100000 #\U10FFFD)))
(:text t))
(defrule ucschar (or (character-ranges (#\UA0 #\UD7FF))
(character-ranges (#\UF900 #\UFDCF))
(character-ranges (#\UFDF0 #\UFFEF))
(character-ranges (#\U10000 #\U1FFFD))
(character-ranges (#\U20000 #\U2FFFD))
(character-ranges (#\U30000 #\U3FFFD))
(character-ranges (#\U40000 #\U4FFFD))
(character-ranges (#\U50000 #\U5FFFD))
(character-ranges (#\U60000 #\U6FFFD))
(character-ranges (#\U70000 #\U7FFFD))
(character-ranges (#\U80000 #\U8FFFD))
(character-ranges (#\U90000 #\U9FFFD))
(character-ranges (#\UA0000 #\UAFFFD))
(character-ranges (#\UB0000 #\UBFFFD))
(character-ranges (#\UC0000 #\UCFFFD))
(character-ranges (#\UD0000 #\UDFFFD))
(character-ranges (#\UE1000 #\UEFFFD)))
(:text t))
(defrule reserved-chars (or gen-delims sub-delims)
(:text t))
(defrule scheme (and alpha (* (or alpha digit "+" "-" "." )))
(:text t))
(defrule ihier-part (and iauthority-start iauthority)
(:function second))
(defrule user-credentials (and iuserinfo credential-delim)
(:function first))
(defrule port-block (and port-delim port)
(:function second)
(:function parse-integer))
(defrule iauthority (and (? user-credentials)
ihost
(? port-block)))
(defrule ireg-name (* (or iunreserved-chars pct-encoded sub-delims ))
(:text t))
(defrule ihost (or ipv4-address ip-literal ireg-name)
(:text t))
(defrule port (+ digit)
(:text t))
(defrule iuserinfo (* (or iunreserved-chars pct-encoded sub-delims ":" ))
(:text t))
(defrule pct-encoded (and "%" hexdig hexdig)
(:text t))
(defrule hexdig (or (character-ranges #\a #\f) digit)
(:text t))
(defrule ipv4-address (and dec-octet "." dec-octet "." dec-octet "." dec-octet)
(:text t))
(defrule ip-literal (and "["
(+ (not (or "[" "]")))
"]")
(:text t))
(defrule ipchar (or iunreserved-chars pct-encoded sub-delims ":" "@")
(:text t))
(defrule isegment (* ipchar)
(:text t))
(defrule isegment-non-zero (+ ipchar)
(:text t))
(defrule isegment-nz-nc (+ (or iunreserved-chars pct-encoded sub-delims "@" ))
(:text t))
(defrule ipath-abempty (* (and "/" isegment))
(:text t))
(defrule ipath (or ipath-abempty)
(:text t))
(defrule ipath-absolute (and "/" (or isegment-nz (* (and "/" isegment ))))
(:text t))
(defrule ipath-rootless (and isegment-non-zero (* (and "/" isegment )))
(:text t))
(defrule ipath-noscheme (and isegment-nz-nc (* (and "/" isegment )))
(:text t))
(defrule ipath-empty ""
(:constant nil))
(defun octect-p (maybe-octect)
(ignore-errors
(let ((number (parse-integer (text-utils:strcat* maybe-octect))))
(when (<= 0 number 255)
number))))
(defrule dec-octet (octect-p (+ digit))
(:text t))
(defun extract-fields-from-absolute-iri (parsed)
(let ((authority (third parsed)))
(list (first parsed) ; scheme
(first authority) ; user-credentials
(second authority) ; host
(third authority) ; port
(fourth parsed) ; path
(fifth parsed) ; iquery
(sixth parsed)))) ; ifragment
(defrule iri (and scheme ":"
ihier-part
(or ipath-abempty
ipath-absolute
ipath-noscheme
ipath-empty)
(? iquery)
(? ifragment))
(:function extract-fields-from-absolute-iri))
(defrule irelative-part (and iauthority-start
iauthority
(or ipath-abempty
ipath-absolute
ipath-noscheme
ipath-empty))
(:function (lambda (a) (list (second a)
(third a)))))
(defun extract-fields-from-relative-iri (parsed)
(let ((authority (first (first parsed)))
(path (second (first parsed))))
(list nil ; scheme
(first authority) ; user-credentials
(second authority) ; host
(third authority) ; port
path
(second parsed) ; iquery
(third parsed)))) ;fragment))))
(defrule irelative-ref (and irelative-part (? iquery) (? ifragment))
(:function extract-fields-from-relative-iri))
(defrule iquery (and iquery-delim (* (or ipchar iprivate "/" "?")))
(:function second)
(:text t))
(defrule ifragment (and ifragment-delim (* (or ipchar "/" "?")))
(:function second)
(:text t))
(defrule iri-reference (or iri irelative-ref))
(defclass iri (uri-parser:uri) ())
(defmethod uri-parser:uri-host ((object iri))
(let ((host (slot-value object 'uri-host)))
(if (text-utils:string-starts-with-p "[" host)
(subseq host 1 (1- (length host)))
host)))
(defun make-iri (&optional scheme user-info host port path query fragment)
(make-instance 'iri
:scheme scheme
:user-info user-info
:host host
:port port
:path path
:query query
:fragment fragment))
(defun iri-parse (iri)
(let* ((parsed (parse 'iri-reference iri :junk-allowed nil))
(res (mapcar (lambda (a) (cond
((typep a 'string)
(if (text-utils:string-empty-p a)
nil
a))
(t a)))
(list (first parsed) ; scheme
(second parsed) ; user-credentials
(third parsed) ; host
(fourth parsed) ; port
(fifth parsed) ; path
(sixth parsed) ; query
(seventh parsed))))) ; fragment
(values (apply #'make-iri res)
res)))
(defun copy-iri (from)
(let ((scheme (uri:uri-scheme from))
(user-info (uri:uri-user-info from))
(host (slot-value from 'uri:uri-host))
(port (uri:uri-port from))
(path (uri:uri-path from))
(query (uri:uri-query from))
(fragment (uri:uri-fragment from)))
(make-iri scheme
user-info
host
port
path
query
fragment)))
(defun render-iri (iri &optional (stream *standard-output*))
(flet ((render ()
(with-output-to-string (string-stream)
(let ((scheme (uri:uri-scheme iri))
(user-info (uri:uri-user-info iri))
(host (slot-value iri 'uri-host))
(port (uri:uri-port iri))
(path (uri:uri-path iri))
(query (uri:uri-query iri))
(fragment (uri:uri-fragment iri)))
(when scheme
(format string-stream "~a:" scheme))
(write-string "//" string-stream)
(when user-info
(format string-stream "~a@" user-info))
(when host
(format string-stream "~a" host))
(when port
(format string-stream ":~a" port))
(when path
(format string-stream "~a" path))
(when query
(format string-stream "?~a" query))
(when fragment
(format string-stream "#~a" fragment))))))
(write-string (render) stream)))
(defmethod to-s ((object iri))
(with-output-to-string (stream)
(render-iri object stream)))

View File

@ -640,6 +640,21 @@
:normalize-path
:make-uri))
(defpackage :iri-parser
(:use
:cl
:alexandria
:esrap
:cl-ppcre
:text-utils)
(:nicknames :iri)
(:export
:iri
:copy-iri
:render-iri
:make-iri
:iri-parse))
(defpackage :x509
(:use
:cl

60
src/tests/iri-tests.lisp Normal file
View File

@ -0,0 +1,60 @@
;; tinmop: an humble gemini and pleroma client
;; Copyright (C) 2020 cage
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program.
;; If not, see [[http://www.gnu.org/licenses/][http://www.gnu.org/licenses/]].
(in-package :iri-tests)
(defsuite iri-suite (all-suite))
(defun test-iri (iri results)
(multiple-value-bind (x parsed)
(iri-parse iri)
(declare (ignore x))
(tree-equal (mapcar #'text-utils:to-s parsed) results :test #'string=)))
(defparameter *test-cases*
'(("file:///tmp/perché.txt" .
("file" nil nil nil "/tmp/perché.txt" nil nil))
("imap://mail.common-lisp.net/mbox1" .
("imap" nil "mail.common-lisp.net" nil "/mbox1" nil nil))
("mms://wms.つづく.sys.hinet.net/cts/Drama/09006251100.asf" .
("mms" nil "wms.つづく.sys.hinet.net" nil "/cts/Drama/09006251100.asf" nil nil))
("nfs://server/path/to/file.txt" .
("nfs" nil "server" nil "/path/to/file.txt" nil nil))
("svn+ssh://svn.èéçòìùzope.org/repos/main/ZConfig/trunk/" .
("svn+ssh" nil "svn.èéçòìùzope.org" nil "/repos/main/ZConfig/trunk/" nil nil))
("git+ssh://git@github.com/user/project.git" .
("git+ssh" "git" "github.com" nil "/user/project.git" nil nil))
("http://common-lisp.net" .
("http" nil "common-lisp.net" nil nil nil nil))
("http://common-lisp.net#abc" .
("http" nil "common-lisp.net" nil nil nil "abc"))
("http://common-lisp.net?q=abc" .
("http" nil "common-lisp.net" nil nil "q=abc" nil))
("http://common-lisp.net/#abc" .
("http" nil "common-lisp.net" nil "/" nil "abc"))
("http://a/b/c/d;p?q#f" .
("http" nil "a" nil "/b/c/d;p" "q" "f"))
("ldap://[2001:db8::7]/c=GB?objectClass?one" .
("ldap" nil "[2001:db8::7]" nil "/c=GB" "objectClass?one" nil))
("http://[dead:beef::]:111/foo/" .
("http" nil "[dead:beef::]" "111" "/foo/" nil nil))
("//foo.bar:198/".
(NIL NIL "foo.bar" "198" "/" NIL NIL))))
(deftest test-parsing (iri-suite)
(loop for (a . b) in *test-cases* do
(assert-true (test-iri a b) a)))

View File

@ -47,6 +47,14 @@
:all-tests)
(:export))
(defpackage :iri-tests
(:use :cl
:alexandria
:clunit
:iri
:all-tests)
(:export))
(defpackage :numeric-tests
(:use :cl
:clunit

View File

@ -75,6 +75,7 @@
(:file "queue")
(:file "stack")
(:file "uri-parser")
(:file "iri-parser")
(:file "x509-ffi")
(:file "x509")
(:file "api-pleroma-entities")
@ -130,6 +131,7 @@
(:file "misc-tests")
(:file "box-tests")
(:file "uri-tests")
(:file "iri-tests")
(:file "numeric-tests")
(:file "text-utils-tests")
(:file "mtree-tests")