From f1df366cba610c0869191cc6d6df19eb8315d2c3 Mon Sep 17 00:00:00 2001 From: cage Date: Fri, 30 Dec 2022 15:03:59 +0100 Subject: [PATCH] - added 'list-id' as metadata for gemini list; - added unit tests for gemini parser. --- src/gemini/gemini-parser.lisp | 43 ++++- src/gemini/package.lisp | 2 +- src/tests/gemini-parser-tests.lisp | 281 +++++++++++++++++++++++++++++ 3 files changed, 318 insertions(+), 8 deletions(-) diff --git a/src/gemini/gemini-parser.lisp b/src/gemini/gemini-parser.lisp index ddc8dc2..4016b3f 100644 --- a/src/gemini/gemini-parser.lisp +++ b/src/gemini/gemini-parser.lisp @@ -19,10 +19,14 @@ (defparameter *raw-mode-data* nil) +(defparameter *list-detected* nil) + (defparameter *parser-lock* (bt:make-recursive-lock)) (defparameter *pre-group-id* -1) +(defparameter *list-id* -1) + (defparameter *header-group-id* -1) (defparameter *source-line-id* -1) @@ -30,11 +34,13 @@ (defparameter *pre-alt-text* "") (defmacro with-initialized-parser (&body body) - `(let ((gemini-parser:*pre-group-id* -1) - (gemini-parser:*header-group-id* -1) - (gemini-parser:*source-line-id* -1) - (gemini-parser:*pre-alt-text* "") - (gemini-parser:*raw-mode-data* nil)) + `(let ((*pre-group-id* -1) + (*header-group-id* -1) + (*source-line-id* -1) + (*list-id* -1) + (*pre-alt-text* "") + (*list-detected* nil) + (*raw-mode-data* nil)) ,@body)) (defun-w-lock next-pre-group-id () *parser-lock* @@ -64,6 +70,13 @@ (defun-w-lock current-source-line-id () *parser-lock* *source-line-id*) +(defun-w-lock next-list-id () *parser-lock* + (incf *list-id*) + *list-id*) + +(defun-w-lock current-list-id () *parser-lock* + *list-id*) + (defparameter *omitted-port* +gemini-default-port+) (define-constant +h1-prefix+ "#" :test #'string=) @@ -745,6 +758,11 @@ (defun add-metadata-to-parsed-gemini-lines (parsed-gemini &key (initialize-parser nil)) (labels ((trim (a) (trim-blanks a)) + (unset-list-mode () + (setf *list-detected* nil)) + (set-list-mode () + (next-list-id) + (setf *list-detected* t)) (pre-alt-text (node) (trim (html-utils:attribute-value (html-utils:find-attribute :alt node)))) (add-attribute (node key value) @@ -757,10 +775,13 @@ (add-attribute node :pre-alt-text alt-text)) (add-pre-group-id (node group-id) (add-attribute node :pre-group-id group-id)) + (add-list-id (node list-id) + (add-attribute node :list-id list-id)) (build-attributes (node) (let ((source-line-id (next-source-line-id)) (res (cond ((null node) + (unset-list-mode) (html-utils:make-tag-node :vertical-space nil nil)) ((html-utils:tag= :as-is node) (add-pre-group-id (add-pre-alt-text (add-header-group-id node @@ -768,19 +789,26 @@ (current-pre-alt-text)) (current-pre-group-id))) ((html-utils:tag= :text node) + (unset-list-mode) (add-header-group-id node (current-header-group-id))) ((or (html-utils:tag= :h1 node) (html-utils:tag= :h2 node) (html-utils:tag= :h3 node)) + (unset-list-mode) (add-header-group-id node (next-header-group-id))) ((html-utils:tag= :li node) - (add-header-group-id node - (current-header-group-id))) + (when (not *list-detected*) + (set-list-mode)) + (add-list-id (add-header-group-id node + (current-header-group-id)) + (current-list-id))) ((html-utils:tag= :quote node) + (unset-list-mode) (add-header-group-id node (current-header-group-id))) ((html-utils:tag= :pre node) + (unset-list-mode) (let ((current-alt-text (pre-alt-text node))) (set-pre-alt-text current-alt-text) (add-pre-group-id (add-pre-alt-text (add-header-group-id node @@ -790,6 +818,7 @@ ((html-utils:tag= :pre-end node) node) ((html-utils:tag= :a node) + (unset-list-mode) (add-header-group-id node (current-header-group-id)))))) (add-source-id res source-line-id))) diff --git a/src/gemini/package.lisp b/src/gemini/package.lisp index de3d722..72fd690 100644 --- a/src/gemini/package.lisp +++ b/src/gemini/package.lisp @@ -98,8 +98,8 @@ :link-text :simple-line :text-line + :add-metadata-to-parsed-gemini-lines :sexp->text-rows - :sexp->text :parse-gemini-response-header :gemini-iri-p :gemini-first-h1 diff --git a/src/tests/gemini-parser-tests.lisp b/src/tests/gemini-parser-tests.lisp index 1953a2b..47925f5 100644 --- a/src/tests/gemini-parser-tests.lisp +++ b/src/tests/gemini-parser-tests.lisp @@ -18,3 +18,284 @@ (in-package :gemini-parser-tests) (defsuite gemini-parser-suite (all-suite)) + +(defparameter *gemini-stream* + "# header-1 +``` preformatted-1-alt +preformatted-line-1-1 +preformatted-line-1-2 +preformatted-line-1-3 +``` +# header-2 + +## header-3 + +* list-a +* list-b + +foo +``` preformatted-2-alt +preformatted-line-2-1 +preformatted-line-2-2 +preformatted-line-2-3 +``` + +bar +* list -c +baz + +> quote line +> quote line-2 +") + +(defun make-threads-parse (function) + (let* ((threads (loop repeat 10 + collect + (progn + (sleep .01) + (bt:make-thread function)))) + (results (loop for thread in threads + collect + (bt:join-thread thread)))) + (elt results (random (length results))))) + +(defun parse-stream () + (make-threads-parse (lambda () (gemini-parser:parse-gemini-file *gemini-stream* + :initialize-parser t)))) + +(deftest parse-test (gemini-parser-suite) + (assert-equality (lambda (a b) (tree-equal a b :test #'string=)) + '((:h1 + ((:source-line "# header-1 +")) + " header-1") + (:pre + ((:source-line "``` preformatted-1-alt +") + (:alt " preformatted-1-alt"))) + (:as-is + ((:source-line "preformatted-line-1-1 +") + (:alt " preformatted-1-alt")) + "preformatted-line-1-1") + (:as-is + ((:source-line "preformatted-line-1-2 +") + (:alt " preformatted-1-alt")) + "preformatted-line-1-2") + (:as-is + ((:source-line "preformatted-line-1-3 +") + (:alt " preformatted-1-alt")) + "preformatted-line-1-3") + (:pre-end + ((:source-line "``` +") + (:alt " preformatted-1-alt"))) + (:h1 + ((:source-line "# header-2 +")) + " header-2") + nil + (:h2 + ((:source-line "## header-3 +")) + " header-3") + nil + (:li + ((:source-line "* list-a +")) + "list-a") + (:li + ((:source-line "* list-b +")) + "list-b") + nil + (:text + ((:source-line "foo +")) + "foo") + (:pre + ((:source-line "``` preformatted-2-alt +") + (:alt " preformatted-2-alt"))) + (:as-is + ((:source-line "preformatted-line-2-1 +") + (:alt " preformatted-2-alt")) + "preformatted-line-2-1") + (:as-is + ((:source-line "preformatted-line-2-2 +") + (:alt " preformatted-2-alt")) + "preformatted-line-2-2") + (:as-is + ((:source-line "preformatted-line-2-3 +") + (:alt " preformatted-2-alt")) + "preformatted-line-2-3") + (:pre-end + ((:source-line "``` +") + (:alt " preformatted-2-alt"))) + nil + (:text + ((:source-line "bar +")) + "bar") + (:li + ((:source-line "* list -c +")) + "list -c") + (:text + ((:source-line "baz +")) + "baz") + nil + (:quote + ((:source-line "> quote line +")) + " quote line") + (:quote + ((:source-line "> quote line-2 +")) + " quote line-2")) + (parse-stream))) + +(defun annotate-stream () + (make-threads-parse (lambda () + (gemini-parser:with-initialized-parser + (gemini-parser:add-metadata-to-parsed-gemini-lines + (gemini-parser:parse-gemini-file *gemini-stream*)))))) + +(deftest annotate-test ((gemini-parser-suite) (parse-test)) + (assert-equality (lambda (a b) (tree-equal a b + :test (lambda (a b) + (if (numberp a) + (= a b) + (string= a b))))) + '((:h1 + ((:source-id 0) (:header-group-id 0) + (:source-line "# header-1 +")) + " header-1") + (:pre + ((:source-id 1) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt") + (:header-group-id 0) + (:source-line "``` preformatted-1-alt +") + (:alt " preformatted-1-alt"))) + (:as-is + ((:source-id 2) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt") + (:header-group-id 0) + (:source-line "preformatted-line-1-1 +") + (:alt " preformatted-1-alt")) + "preformatted-line-1-1") + (:as-is + ((:source-id 3) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt") + (:header-group-id 0) + (:source-line "preformatted-line-1-2 +") + (:alt " preformatted-1-alt")) + "preformatted-line-1-2") + (:as-is + ((:source-id 4) (:pre-group-id 0) (:pre-alt-text "preformatted-1-alt") + (:header-group-id 0) + (:source-line "preformatted-line-1-3 +") + (:alt " preformatted-1-alt")) + "preformatted-line-1-3") + (:pre-end + ((:source-id 5) + (:source-line "``` +") + (:alt " preformatted-1-alt"))) + (:h1 + ((:source-id 6) (:header-group-id 1) + (:source-line "# header-2 +")) + " header-2") + (:vertical-space ((:source-id 7))) + (:h2 + ((:source-id 8) (:header-group-id 2) + (:source-line "## header-3 +")) + " header-3") + (:vertical-space ((:source-id 9))) + (:li + ((:source-id 10) (:list-id 0) (:header-group-id 2) + (:source-line "* list-a +")) + "list-a") + (:li + ((:source-id 11) (:list-id 0) (:header-group-id 2) + (:source-line "* list-b +")) + "list-b") + (:vertical-space ((:source-id 12))) + (:text + ((:source-id 13) (:header-group-id 2) + (:source-line "foo +")) + "foo") + (:pre + ((:source-id 14) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt") + (:header-group-id 2) + (:source-line "``` preformatted-2-alt +") + (:alt " preformatted-2-alt"))) + (:as-is + ((:source-id 15) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt") + (:header-group-id 2) + (:source-line "preformatted-line-2-1 +") + (:alt " preformatted-2-alt")) + "preformatted-line-2-1") + (:as-is + ((:source-id 16) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt") + (:header-group-id 2) + (:source-line "preformatted-line-2-2 +") + (:alt " preformatted-2-alt")) + "preformatted-line-2-2") + (:as-is + ((:source-id 17) (:pre-group-id 1) (:pre-alt-text "preformatted-2-alt") + (:header-group-id 2) + (:source-line "preformatted-line-2-3 +") + (:alt " preformatted-2-alt")) + "preformatted-line-2-3") + (:pre-end + ((:source-id 18) + (:source-line "``` +") + (:alt " preformatted-2-alt"))) + (:vertical-space ((:source-id 19))) + (:text + ((:source-id 20) (:header-group-id 2) + (:source-line "bar +")) + "bar") + (:li + ((:source-id 21) (:list-id 1) (:header-group-id 2) + (:source-line "* list -c +")) + "list -c") + (:text + ((:source-id 22) (:header-group-id 2) + (:source-line "baz +")) + "baz") + (:vertical-space ((:source-id 23))) + (:quote + ((:source-id 24) (:header-group-id 2) + (:source-line "> quote line +")) + " quote line") + (:quote + ((:source-id 25) (:header-group-id 2) + (:source-line "> quote line-2 +")) + " quote line-2")) + (annotate-stream)))