mirror of https://codeberg.org/cage/tinmop/
- added 'list-id' as metadata for gemini list;
- added unit tests for gemini parser.
This commit is contained in:
parent
3114264505
commit
f1df366cba
|
@ -19,10 +19,14 @@
|
||||||
|
|
||||||
(defparameter *raw-mode-data* nil)
|
(defparameter *raw-mode-data* nil)
|
||||||
|
|
||||||
|
(defparameter *list-detected* nil)
|
||||||
|
|
||||||
(defparameter *parser-lock* (bt:make-recursive-lock))
|
(defparameter *parser-lock* (bt:make-recursive-lock))
|
||||||
|
|
||||||
(defparameter *pre-group-id* -1)
|
(defparameter *pre-group-id* -1)
|
||||||
|
|
||||||
|
(defparameter *list-id* -1)
|
||||||
|
|
||||||
(defparameter *header-group-id* -1)
|
(defparameter *header-group-id* -1)
|
||||||
|
|
||||||
(defparameter *source-line-id* -1)
|
(defparameter *source-line-id* -1)
|
||||||
|
@ -30,11 +34,13 @@
|
||||||
(defparameter *pre-alt-text* "")
|
(defparameter *pre-alt-text* "")
|
||||||
|
|
||||||
(defmacro with-initialized-parser (&body body)
|
(defmacro with-initialized-parser (&body body)
|
||||||
`(let ((gemini-parser:*pre-group-id* -1)
|
`(let ((*pre-group-id* -1)
|
||||||
(gemini-parser:*header-group-id* -1)
|
(*header-group-id* -1)
|
||||||
(gemini-parser:*source-line-id* -1)
|
(*source-line-id* -1)
|
||||||
(gemini-parser:*pre-alt-text* "")
|
(*list-id* -1)
|
||||||
(gemini-parser:*raw-mode-data* nil))
|
(*pre-alt-text* "")
|
||||||
|
(*list-detected* nil)
|
||||||
|
(*raw-mode-data* nil))
|
||||||
,@body))
|
,@body))
|
||||||
|
|
||||||
(defun-w-lock next-pre-group-id () *parser-lock*
|
(defun-w-lock next-pre-group-id () *parser-lock*
|
||||||
|
@ -64,6 +70,13 @@
|
||||||
(defun-w-lock current-source-line-id () *parser-lock*
|
(defun-w-lock current-source-line-id () *parser-lock*
|
||||||
*source-line-id*)
|
*source-line-id*)
|
||||||
|
|
||||||
|
(defun-w-lock next-list-id () *parser-lock*
|
||||||
|
(incf *list-id*)
|
||||||
|
*list-id*)
|
||||||
|
|
||||||
|
(defun-w-lock current-list-id () *parser-lock*
|
||||||
|
*list-id*)
|
||||||
|
|
||||||
(defparameter *omitted-port* +gemini-default-port+)
|
(defparameter *omitted-port* +gemini-default-port+)
|
||||||
|
|
||||||
(define-constant +h1-prefix+ "#" :test #'string=)
|
(define-constant +h1-prefix+ "#" :test #'string=)
|
||||||
|
@ -745,6 +758,11 @@
|
||||||
(defun add-metadata-to-parsed-gemini-lines (parsed-gemini &key (initialize-parser nil))
|
(defun add-metadata-to-parsed-gemini-lines (parsed-gemini &key (initialize-parser nil))
|
||||||
(labels ((trim (a)
|
(labels ((trim (a)
|
||||||
(trim-blanks a))
|
(trim-blanks a))
|
||||||
|
(unset-list-mode ()
|
||||||
|
(setf *list-detected* nil))
|
||||||
|
(set-list-mode ()
|
||||||
|
(next-list-id)
|
||||||
|
(setf *list-detected* t))
|
||||||
(pre-alt-text (node)
|
(pre-alt-text (node)
|
||||||
(trim (html-utils:attribute-value (html-utils:find-attribute :alt node))))
|
(trim (html-utils:attribute-value (html-utils:find-attribute :alt node))))
|
||||||
(add-attribute (node key value)
|
(add-attribute (node key value)
|
||||||
|
@ -757,10 +775,13 @@
|
||||||
(add-attribute node :pre-alt-text alt-text))
|
(add-attribute node :pre-alt-text alt-text))
|
||||||
(add-pre-group-id (node group-id)
|
(add-pre-group-id (node group-id)
|
||||||
(add-attribute node :pre-group-id group-id))
|
(add-attribute node :pre-group-id group-id))
|
||||||
|
(add-list-id (node list-id)
|
||||||
|
(add-attribute node :list-id list-id))
|
||||||
(build-attributes (node)
|
(build-attributes (node)
|
||||||
(let ((source-line-id (next-source-line-id))
|
(let ((source-line-id (next-source-line-id))
|
||||||
(res (cond
|
(res (cond
|
||||||
((null node)
|
((null node)
|
||||||
|
(unset-list-mode)
|
||||||
(html-utils:make-tag-node :vertical-space nil nil))
|
(html-utils:make-tag-node :vertical-space nil nil))
|
||||||
((html-utils:tag= :as-is node)
|
((html-utils:tag= :as-is node)
|
||||||
(add-pre-group-id (add-pre-alt-text (add-header-group-id node
|
(add-pre-group-id (add-pre-alt-text (add-header-group-id node
|
||||||
|
@ -768,19 +789,26 @@
|
||||||
(current-pre-alt-text))
|
(current-pre-alt-text))
|
||||||
(current-pre-group-id)))
|
(current-pre-group-id)))
|
||||||
((html-utils:tag= :text node)
|
((html-utils:tag= :text node)
|
||||||
|
(unset-list-mode)
|
||||||
(add-header-group-id node
|
(add-header-group-id node
|
||||||
(current-header-group-id)))
|
(current-header-group-id)))
|
||||||
((or (html-utils:tag= :h1 node)
|
((or (html-utils:tag= :h1 node)
|
||||||
(html-utils:tag= :h2 node)
|
(html-utils:tag= :h2 node)
|
||||||
(html-utils:tag= :h3 node))
|
(html-utils:tag= :h3 node))
|
||||||
|
(unset-list-mode)
|
||||||
(add-header-group-id node (next-header-group-id)))
|
(add-header-group-id node (next-header-group-id)))
|
||||||
((html-utils:tag= :li node)
|
((html-utils:tag= :li node)
|
||||||
(add-header-group-id node
|
(when (not *list-detected*)
|
||||||
(current-header-group-id)))
|
(set-list-mode))
|
||||||
|
(add-list-id (add-header-group-id node
|
||||||
|
(current-header-group-id))
|
||||||
|
(current-list-id)))
|
||||||
((html-utils:tag= :quote node)
|
((html-utils:tag= :quote node)
|
||||||
|
(unset-list-mode)
|
||||||
(add-header-group-id node
|
(add-header-group-id node
|
||||||
(current-header-group-id)))
|
(current-header-group-id)))
|
||||||
((html-utils:tag= :pre node)
|
((html-utils:tag= :pre node)
|
||||||
|
(unset-list-mode)
|
||||||
(let ((current-alt-text (pre-alt-text node)))
|
(let ((current-alt-text (pre-alt-text node)))
|
||||||
(set-pre-alt-text current-alt-text)
|
(set-pre-alt-text current-alt-text)
|
||||||
(add-pre-group-id (add-pre-alt-text (add-header-group-id node
|
(add-pre-group-id (add-pre-alt-text (add-header-group-id node
|
||||||
|
@ -790,6 +818,7 @@
|
||||||
((html-utils:tag= :pre-end node)
|
((html-utils:tag= :pre-end node)
|
||||||
node)
|
node)
|
||||||
((html-utils:tag= :a node)
|
((html-utils:tag= :a node)
|
||||||
|
(unset-list-mode)
|
||||||
(add-header-group-id node
|
(add-header-group-id node
|
||||||
(current-header-group-id))))))
|
(current-header-group-id))))))
|
||||||
(add-source-id res source-line-id)))
|
(add-source-id res source-line-id)))
|
||||||
|
|
|
@ -98,8 +98,8 @@
|
||||||
:link-text
|
:link-text
|
||||||
:simple-line
|
:simple-line
|
||||||
:text-line
|
:text-line
|
||||||
|
:add-metadata-to-parsed-gemini-lines
|
||||||
:sexp->text-rows
|
:sexp->text-rows
|
||||||
:sexp->text
|
|
||||||
:parse-gemini-response-header
|
:parse-gemini-response-header
|
||||||
:gemini-iri-p
|
:gemini-iri-p
|
||||||
:gemini-first-h1
|
:gemini-first-h1
|
||||||
|
|
|
@ -18,3 +18,284 @@
|
||||||
(in-package :gemini-parser-tests)
|
(in-package :gemini-parser-tests)
|
||||||
|
|
||||||
(defsuite gemini-parser-suite (all-suite))
|
(defsuite gemini-parser-suite (all-suite))
|
||||||
|
|
||||||
|
(defparameter *gemini-stream*
|
||||||
|
"# header-1
|
||||||
|
``` preformatted-1-alt
|
||||||
|
preformatted-line-1-1
|
||||||
|
preformatted-line-1-2
|
||||||
|
preformatted-line-1-3
|
||||||
|
```
|
||||||
|
# header-2
|
||||||
|
|
||||||
|
## header-3
|
||||||
|
|
||||||
|
* list-a
|
||||||
|
* list-b
|
||||||
|
|
||||||
|
foo
|
||||||
|
``` preformatted-2-alt
|
||||||
|
preformatted-line-2-1
|
||||||
|
preformatted-line-2-2
|
||||||
|
preformatted-line-2-3
|
||||||
|
```
|
||||||
|
|
||||||
|
bar
|
||||||
|
* list -c
|
||||||
|
baz
|
||||||
|
|
||||||
|
> quote line
|
||||||
|
> quote line-2
|
||||||
|
")
|
||||||
|
|
||||||
|
(defun make-threads-parse (function)
|
||||||
|
(let* ((threads (loop repeat 10
|
||||||
|
collect
|
||||||
|
(progn
|
||||||
|
(sleep .01)
|
||||||
|
(bt:make-thread function))))
|
||||||
|
(results (loop for thread in threads
|
||||||
|
collect
|
||||||
|
(bt:join-thread thread))))
|
||||||
|
(elt results (random (length results)))))
|
||||||
|
|
||||||
|
(defun parse-stream ()
|
||||||
|
(make-threads-parse (lambda () (gemini-parser:parse-gemini-file *gemini-stream*
|
||||||
|
:initialize-parser t))))
|
||||||
|
|
||||||
|
(deftest parse-test (gemini-parser-suite)
|
||||||
|
(assert-equality (lambda (a b) (tree-equal a b :test #'string=))
|
||||||
|
'((:h1
|
||||||
|
((:source-line "# header-1
|
||||||
|
"))
|
||||||
|
" header-1")
|
||||||
|
(:pre
|
||||||
|
((:source-line "``` preformatted-1-alt
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt")))
|
||||||
|
(:as-is
|
||||||
|
((:source-line "preformatted-line-1-1
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt"))
|
||||||
|
"preformatted-line-1-1")
|
||||||
|
(:as-is
|
||||||
|
((:source-line "preformatted-line-1-2
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt"))
|
||||||
|
"preformatted-line-1-2")
|
||||||
|
(:as-is
|
||||||
|
((:source-line "preformatted-line-1-3
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt"))
|
||||||
|
"preformatted-line-1-3")
|
||||||
|
(:pre-end
|
||||||
|
((:source-line "```
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt")))
|
||||||
|
(:h1
|
||||||
|
((:source-line "# header-2
|
||||||
|
"))
|
||||||
|
" header-2")
|
||||||
|
nil
|
||||||
|
(:h2
|
||||||
|
((:source-line "## header-3
|
||||||
|
"))
|
||||||
|
" header-3")
|
||||||
|
nil
|
||||||
|
(:li
|
||||||
|
((:source-line "* list-a
|
||||||
|
"))
|
||||||
|
"list-a")
|
||||||
|
(:li
|
||||||
|
((:source-line "* list-b
|
||||||
|
"))
|
||||||
|
"list-b")
|
||||||
|
nil
|
||||||
|
(:text
|
||||||
|
((:source-line "foo
|
||||||
|
"))
|
||||||
|
"foo")
|
||||||
|
(:pre
|
||||||
|
((:source-line "``` preformatted-2-alt
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt")))
|
||||||
|
(:as-is
|
||||||
|
((:source-line "preformatted-line-2-1
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt"))
|
||||||
|
"preformatted-line-2-1")
|
||||||
|
(:as-is
|
||||||
|
((:source-line "preformatted-line-2-2
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt"))
|
||||||
|
"preformatted-line-2-2")
|
||||||
|
(:as-is
|
||||||
|
((:source-line "preformatted-line-2-3
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt"))
|
||||||
|
"preformatted-line-2-3")
|
||||||
|
(:pre-end
|
||||||
|
((:source-line "```
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt")))
|
||||||
|
nil
|
||||||
|
(:text
|
||||||
|
((:source-line "bar
|
||||||
|
"))
|
||||||
|
"bar")
|
||||||
|
(:li
|
||||||
|
((:source-line "* list -c
|
||||||
|
"))
|
||||||
|
"list -c")
|
||||||
|
(:text
|
||||||
|
((:source-line "baz
|
||||||
|
"))
|
||||||
|
"baz")
|
||||||
|
nil
|
||||||
|
(:quote
|
||||||
|
((:source-line "> quote line
|
||||||
|
"))
|
||||||
|
" quote line")
|
||||||
|
(:quote
|
||||||
|
((:source-line "> quote line-2
|
||||||
|
"))
|
||||||
|
" quote line-2"))
|
||||||
|
(parse-stream)))
|
||||||
|
|
||||||
|
(defun annotate-stream ()
|
||||||
|
(make-threads-parse (lambda ()
|
||||||
|
(gemini-parser:with-initialized-parser
|
||||||
|
(gemini-parser:add-metadata-to-parsed-gemini-lines
|
||||||
|
(gemini-parser:parse-gemini-file *gemini-stream*))))))
|
||||||
|
|
||||||
|
(deftest annotate-test ((gemini-parser-suite) (parse-test))
|
||||||
|
(assert-equality (lambda (a b) (tree-equal a b
|
||||||
|
:test (lambda (a b)
|
||||||
|
(if (numberp a)
|
||||||
|
(= a b)
|
||||||
|
(string= a b)))))
|
||||||
|
'((:h1
|
||||||
|
((:source-id 0) (:header-group-id 0)
|
||||||
|
(:source-line "# header-1
|
||||||
|
"))
|
||||||
|
" header-1")
|
||||||
|
(:pre
|
||||||
|
((:source-id 1) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
|
||||||
|
(:header-group-id 0)
|
||||||
|
(:source-line "``` preformatted-1-alt
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt")))
|
||||||
|
(:as-is
|
||||||
|
((:source-id 2) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
|
||||||
|
(:header-group-id 0)
|
||||||
|
(:source-line "preformatted-line-1-1
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt"))
|
||||||
|
"preformatted-line-1-1")
|
||||||
|
(:as-is
|
||||||
|
((:source-id 3) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
|
||||||
|
(:header-group-id 0)
|
||||||
|
(:source-line "preformatted-line-1-2
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt"))
|
||||||
|
"preformatted-line-1-2")
|
||||||
|
(:as-is
|
||||||
|
((:source-id 4) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
|
||||||
|
(:header-group-id 0)
|
||||||
|
(:source-line "preformatted-line-1-3
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt"))
|
||||||
|
"preformatted-line-1-3")
|
||||||
|
(:pre-end
|
||||||
|
((:source-id 5)
|
||||||
|
(:source-line "```
|
||||||
|
")
|
||||||
|
(:alt " preformatted-1-alt")))
|
||||||
|
(:h1
|
||||||
|
((:source-id 6) (:header-group-id 1)
|
||||||
|
(:source-line "# header-2
|
||||||
|
"))
|
||||||
|
" header-2")
|
||||||
|
(:vertical-space ((:source-id 7)))
|
||||||
|
(:h2
|
||||||
|
((:source-id 8) (:header-group-id 2)
|
||||||
|
(:source-line "## header-3
|
||||||
|
"))
|
||||||
|
" header-3")
|
||||||
|
(:vertical-space ((:source-id 9)))
|
||||||
|
(:li
|
||||||
|
((:source-id 10) (:list-id 0) (:header-group-id 2)
|
||||||
|
(:source-line "* list-a
|
||||||
|
"))
|
||||||
|
"list-a")
|
||||||
|
(:li
|
||||||
|
((:source-id 11) (:list-id 0) (:header-group-id 2)
|
||||||
|
(:source-line "* list-b
|
||||||
|
"))
|
||||||
|
"list-b")
|
||||||
|
(:vertical-space ((:source-id 12)))
|
||||||
|
(:text
|
||||||
|
((:source-id 13) (:header-group-id 2)
|
||||||
|
(:source-line "foo
|
||||||
|
"))
|
||||||
|
"foo")
|
||||||
|
(:pre
|
||||||
|
((:source-id 14) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
|
||||||
|
(:header-group-id 2)
|
||||||
|
(:source-line "``` preformatted-2-alt
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt")))
|
||||||
|
(:as-is
|
||||||
|
((:source-id 15) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
|
||||||
|
(:header-group-id 2)
|
||||||
|
(:source-line "preformatted-line-2-1
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt"))
|
||||||
|
"preformatted-line-2-1")
|
||||||
|
(:as-is
|
||||||
|
((:source-id 16) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
|
||||||
|
(:header-group-id 2)
|
||||||
|
(:source-line "preformatted-line-2-2
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt"))
|
||||||
|
"preformatted-line-2-2")
|
||||||
|
(:as-is
|
||||||
|
((:source-id 17) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
|
||||||
|
(:header-group-id 2)
|
||||||
|
(:source-line "preformatted-line-2-3
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt"))
|
||||||
|
"preformatted-line-2-3")
|
||||||
|
(:pre-end
|
||||||
|
((:source-id 18)
|
||||||
|
(:source-line "```
|
||||||
|
")
|
||||||
|
(:alt " preformatted-2-alt")))
|
||||||
|
(:vertical-space ((:source-id 19)))
|
||||||
|
(:text
|
||||||
|
((:source-id 20) (:header-group-id 2)
|
||||||
|
(:source-line "bar
|
||||||
|
"))
|
||||||
|
"bar")
|
||||||
|
(:li
|
||||||
|
((:source-id 21) (:list-id 1) (:header-group-id 2)
|
||||||
|
(:source-line "* list -c
|
||||||
|
"))
|
||||||
|
"list -c")
|
||||||
|
(:text
|
||||||
|
((:source-id 22) (:header-group-id 2)
|
||||||
|
(:source-line "baz
|
||||||
|
"))
|
||||||
|
"baz")
|
||||||
|
(:vertical-space ((:source-id 23)))
|
||||||
|
(:quote
|
||||||
|
((:source-id 24) (:header-group-id 2)
|
||||||
|
(:source-line "> quote line
|
||||||
|
"))
|
||||||
|
" quote line")
|
||||||
|
(:quote
|
||||||
|
((:source-id 25) (:header-group-id 2)
|
||||||
|
(:source-line "> quote line-2
|
||||||
|
"))
|
||||||
|
" quote line-2"))
|
||||||
|
(annotate-stream)))
|
||||||
|
|
Loading…
Reference in New Issue