1
0
Fork 0

- added 'list-id' as metadata for gemini list;

- added unit tests for gemini parser.
This commit is contained in:
cage 2022-12-30 15:03:59 +01:00
parent 3114264505
commit f1df366cba
3 changed files with 318 additions and 8 deletions

View File

@ -19,10 +19,14 @@
(defparameter *raw-mode-data* nil)
(defparameter *list-detected* nil)
(defparameter *parser-lock* (bt:make-recursive-lock))
(defparameter *pre-group-id* -1)
(defparameter *list-id* -1)
(defparameter *header-group-id* -1)
(defparameter *source-line-id* -1)
@ -30,11 +34,13 @@
(defparameter *pre-alt-text* "")
(defmacro with-initialized-parser (&body body)
`(let ((gemini-parser:*pre-group-id* -1)
(gemini-parser:*header-group-id* -1)
(gemini-parser:*source-line-id* -1)
(gemini-parser:*pre-alt-text* "")
(gemini-parser:*raw-mode-data* nil))
`(let ((*pre-group-id* -1)
(*header-group-id* -1)
(*source-line-id* -1)
(*list-id* -1)
(*pre-alt-text* "")
(*list-detected* nil)
(*raw-mode-data* nil))
,@body))
(defun-w-lock next-pre-group-id () *parser-lock*
@ -64,6 +70,13 @@
(defun-w-lock current-source-line-id () *parser-lock*
*source-line-id*)
(defun-w-lock next-list-id () *parser-lock*
(incf *list-id*)
*list-id*)
(defun-w-lock current-list-id () *parser-lock*
*list-id*)
(defparameter *omitted-port* +gemini-default-port+)
(define-constant +h1-prefix+ "#" :test #'string=)
@ -745,6 +758,11 @@
(defun add-metadata-to-parsed-gemini-lines (parsed-gemini &key (initialize-parser nil))
(labels ((trim (a)
(trim-blanks a))
(unset-list-mode ()
(setf *list-detected* nil))
(set-list-mode ()
(next-list-id)
(setf *list-detected* t))
(pre-alt-text (node)
(trim (html-utils:attribute-value (html-utils:find-attribute :alt node))))
(add-attribute (node key value)
@ -757,10 +775,13 @@
(add-attribute node :pre-alt-text alt-text))
(add-pre-group-id (node group-id)
(add-attribute node :pre-group-id group-id))
(add-list-id (node list-id)
(add-attribute node :list-id list-id))
(build-attributes (node)
(let ((source-line-id (next-source-line-id))
(res (cond
((null node)
(unset-list-mode)
(html-utils:make-tag-node :vertical-space nil nil))
((html-utils:tag= :as-is node)
(add-pre-group-id (add-pre-alt-text (add-header-group-id node
@ -768,19 +789,26 @@
(current-pre-alt-text))
(current-pre-group-id)))
((html-utils:tag= :text node)
(unset-list-mode)
(add-header-group-id node
(current-header-group-id)))
((or (html-utils:tag= :h1 node)
(html-utils:tag= :h2 node)
(html-utils:tag= :h3 node))
(unset-list-mode)
(add-header-group-id node (next-header-group-id)))
((html-utils:tag= :li node)
(add-header-group-id node
(current-header-group-id)))
(when (not *list-detected*)
(set-list-mode))
(add-list-id (add-header-group-id node
(current-header-group-id))
(current-list-id)))
((html-utils:tag= :quote node)
(unset-list-mode)
(add-header-group-id node
(current-header-group-id)))
((html-utils:tag= :pre node)
(unset-list-mode)
(let ((current-alt-text (pre-alt-text node)))
(set-pre-alt-text current-alt-text)
(add-pre-group-id (add-pre-alt-text (add-header-group-id node
@ -790,6 +818,7 @@
((html-utils:tag= :pre-end node)
node)
((html-utils:tag= :a node)
(unset-list-mode)
(add-header-group-id node
(current-header-group-id))))))
(add-source-id res source-line-id)))

View File

@ -98,8 +98,8 @@
:link-text
:simple-line
:text-line
:add-metadata-to-parsed-gemini-lines
:sexp->text-rows
:sexp->text
:parse-gemini-response-header
:gemini-iri-p
:gemini-first-h1

View File

@ -18,3 +18,284 @@
(in-package :gemini-parser-tests)
(defsuite gemini-parser-suite (all-suite))
(defparameter *gemini-stream*
"# header-1
``` preformatted-1-alt
preformatted-line-1-1
preformatted-line-1-2
preformatted-line-1-3
```
# header-2
## header-3
* list-a
* list-b
foo
``` preformatted-2-alt
preformatted-line-2-1
preformatted-line-2-2
preformatted-line-2-3
```
bar
* list -c
baz
> quote line
> quote line-2
")
(defun make-threads-parse (function)
(let* ((threads (loop repeat 10
collect
(progn
(sleep .01)
(bt:make-thread function))))
(results (loop for thread in threads
collect
(bt:join-thread thread))))
(elt results (random (length results)))))
(defun parse-stream ()
(make-threads-parse (lambda () (gemini-parser:parse-gemini-file *gemini-stream*
:initialize-parser t))))
(deftest parse-test (gemini-parser-suite)
(assert-equality (lambda (a b) (tree-equal a b :test #'string=))
'((:h1
((:source-line "# header-1
"))
" header-1")
(:pre
((:source-line "``` preformatted-1-alt
")
(:alt " preformatted-1-alt")))
(:as-is
((:source-line "preformatted-line-1-1
")
(:alt " preformatted-1-alt"))
"preformatted-line-1-1")
(:as-is
((:source-line "preformatted-line-1-2
")
(:alt " preformatted-1-alt"))
"preformatted-line-1-2")
(:as-is
((:source-line "preformatted-line-1-3
")
(:alt " preformatted-1-alt"))
"preformatted-line-1-3")
(:pre-end
((:source-line "```
")
(:alt " preformatted-1-alt")))
(:h1
((:source-line "# header-2
"))
" header-2")
nil
(:h2
((:source-line "## header-3
"))
" header-3")
nil
(:li
((:source-line "* list-a
"))
"list-a")
(:li
((:source-line "* list-b
"))
"list-b")
nil
(:text
((:source-line "foo
"))
"foo")
(:pre
((:source-line "``` preformatted-2-alt
")
(:alt " preformatted-2-alt")))
(:as-is
((:source-line "preformatted-line-2-1
")
(:alt " preformatted-2-alt"))
"preformatted-line-2-1")
(:as-is
((:source-line "preformatted-line-2-2
")
(:alt " preformatted-2-alt"))
"preformatted-line-2-2")
(:as-is
((:source-line "preformatted-line-2-3
")
(:alt " preformatted-2-alt"))
"preformatted-line-2-3")
(:pre-end
((:source-line "```
")
(:alt " preformatted-2-alt")))
nil
(:text
((:source-line "bar
"))
"bar")
(:li
((:source-line "* list -c
"))
"list -c")
(:text
((:source-line "baz
"))
"baz")
nil
(:quote
((:source-line "> quote line
"))
" quote line")
(:quote
((:source-line "> quote line-2
"))
" quote line-2"))
(parse-stream)))
(defun annotate-stream ()
(make-threads-parse (lambda ()
(gemini-parser:with-initialized-parser
(gemini-parser:add-metadata-to-parsed-gemini-lines
(gemini-parser:parse-gemini-file *gemini-stream*))))))
(deftest annotate-test ((gemini-parser-suite) (parse-test))
(assert-equality (lambda (a b) (tree-equal a b
:test (lambda (a b)
(if (numberp a)
(= a b)
(string= a b)))))
'((:h1
((:source-id 0) (:header-group-id 0)
(:source-line "# header-1
"))
" header-1")
(:pre
((:source-id 1) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
(:header-group-id 0)
(:source-line "``` preformatted-1-alt
")
(:alt " preformatted-1-alt")))
(:as-is
((:source-id 2) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
(:header-group-id 0)
(:source-line "preformatted-line-1-1
")
(:alt " preformatted-1-alt"))
"preformatted-line-1-1")
(:as-is
((:source-id 3) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
(:header-group-id 0)
(:source-line "preformatted-line-1-2
")
(:alt " preformatted-1-alt"))
"preformatted-line-1-2")
(:as-is
((:source-id 4) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt")
(:header-group-id 0)
(:source-line "preformatted-line-1-3
")
(:alt " preformatted-1-alt"))
"preformatted-line-1-3")
(:pre-end
((:source-id 5)
(:source-line "```
")
(:alt " preformatted-1-alt")))
(:h1
((:source-id 6) (:header-group-id 1)
(:source-line "# header-2
"))
" header-2")
(:vertical-space ((:source-id 7)))
(:h2
((:source-id 8) (:header-group-id 2)
(:source-line "## header-3
"))
" header-3")
(:vertical-space ((:source-id 9)))
(:li
((:source-id 10) (:list-id 0) (:header-group-id 2)
(:source-line "* list-a
"))
"list-a")
(:li
((:source-id 11) (:list-id 0) (:header-group-id 2)
(:source-line "* list-b
"))
"list-b")
(:vertical-space ((:source-id 12)))
(:text
((:source-id 13) (:header-group-id 2)
(:source-line "foo
"))
"foo")
(:pre
((:source-id 14) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
(:header-group-id 2)
(:source-line "``` preformatted-2-alt
")
(:alt " preformatted-2-alt")))
(:as-is
((:source-id 15) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
(:header-group-id 2)
(:source-line "preformatted-line-2-1
")
(:alt " preformatted-2-alt"))
"preformatted-line-2-1")
(:as-is
((:source-id 16) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
(:header-group-id 2)
(:source-line "preformatted-line-2-2
")
(:alt " preformatted-2-alt"))
"preformatted-line-2-2")
(:as-is
((:source-id 17) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt")
(:header-group-id 2)
(:source-line "preformatted-line-2-3
")
(:alt " preformatted-2-alt"))
"preformatted-line-2-3")
(:pre-end
((:source-id 18)
(:source-line "```
")
(:alt " preformatted-2-alt")))
(:vertical-space ((:source-id 19)))
(:text
((:source-id 20) (:header-group-id 2)
(:source-line "bar
"))
"bar")
(:li
((:source-id 21) (:list-id 1) (:header-group-id 2)
(:source-line "* list -c
"))
"list -c")
(:text
((:source-id 22) (:header-group-id 2)
(:source-line "baz
"))
"baz")
(:vertical-space ((:source-id 23)))
(:quote
((:source-id 24) (:header-group-id 2)
(:source-line "> quote line
"))
" quote line")
(:quote
((:source-id 25) (:header-group-id 2)
(:source-line "> quote line-2
"))
" quote line-2"))
(annotate-stream)))