1
0
mirror of https://codeberg.org/cage/tinmop/ synced 2025-02-02 04:36:43 +01:00

- added 'search-gemini-fragment'

This commit is contained in:
cage 2021-11-06 11:21:15 +01:00
parent b1b412ad89
commit 768e4a220b
6 changed files with 132 additions and 16 deletions

View File

@ -403,7 +403,12 @@
(source-line-id
:initform nil
:initarg :source-line-id
:accessor source-line-id)))
:accessor source-line-id)
(artificial
:initform nil
:initarg :artificial
:reader artificialp
:writer (setf artificial))))
(defmethod print-object ((object with-raw-text) stream)
(format stream "sid: ~a raw-text: ~a" (source-line-id object) (raw-text object)))
@ -490,11 +495,12 @@
:initarg :level
:accessor level)))
(defun make-header-line (text gid level)
(defun make-header-line (text gid level artificial)
(make-instance 'header-line
:lines (list text)
:group-id gid
:level level))
:lines (list text)
:group-id gid
:level level
:artificial artificial))
(defclass unordered-list-line (with-group-id with-lines with-raw-text) ())
@ -551,8 +557,8 @@
(make-header (level text underline-char)
(let ((underline (build-underline text underline-char))
(header-group-id (next-header-group-id)))
(list (make-header-line text header-group-id level)
(make-header-line underline header-group-id level))))
(list (make-header-line text header-group-id level nil)
(make-header-line underline header-group-id level t))))
(extract-source-line (node)
(html-utils:attribute-value (html-utils:find-attribute :source-line node)))
(trim (a)

View File

@ -71,6 +71,10 @@
:h3-prefix
:quote-prefix
:bullet-prefix
:raw-text
:source-line-id
:artificialp
:artificial
:with-lines
:with-pre-group-id
:pre-group-id

View File

@ -691,3 +691,92 @@
(refresh-config *message-window*)
(draw *message-window*)
*message-window*))
(defun original-source-metadata (row)
(let* ((original-line (row-get-original-object row))
(source-line (gemini-parser:raw-text original-line))
(source-line-id (gemini-parser:source-line-id original-line))
(artificialp (gemini-parser:artificialp original-line)))
(values source-line-id source-line artificialp)))
(defun search-gemini-fragment (window fragment)
(labels ((prefix-diff (text-rows source-line)
(- (length (first (split-words (first text-rows))))
(length (first (split-words source-line)))))
(reconstruct-source-lines (text-rows blanks &optional (accum '()))
(if (null text-rows)
(mapcar (lambda (a) (reduce #'strcat a)) accum)
(let ((text-rows-words (remove-if #'string-empty-p
(split-words (first text-rows)))))
(multiple-value-bind (reconstructed rest-blanks)
(reconstruct-source-line text-rows-words blanks)
(reconstruct-source-lines (rest text-rows)
rest-blanks
(append accum (list reconstructed)))))))
(quoted-line-p (row)
(typep (row-get-original-object row) 'gemini-parser:quoted-lines))
(strip-prefix-from-quoted-lines (text-rows prefix)
(append (list (first text-rows))
(mapcar (lambda (a) (regex-replace prefix a ""))
(rest text-rows))))
(rows->text-rows (rows)
(let ((text-rows (mapcar (lambda (a)
(tui-string->chars-string (normal-text a)))
rows)))
(if (quoted-line-p (first rows))
(let ((prefix (gemini-parser::prefix (row-get-original-object (first rows)))))
(strip-prefix-from-quoted-lines text-rows prefix))
text-rows)))
(reconstruct-source-line (words blanks)
(let ((rest-blanks '())
(res '()))
(loop for word in words
for blank in blanks
for ct from 1
do
(push word res)
(push blank res)
finally (setf rest-blanks (subseq blanks ct)))
(values (reverse res) rest-blanks))))
(with-accessors ((row-selected-index row-selected-index)) window
(let ((rest-rows (rows-safe-subseq window row-selected-index))
(matching-source-line nil)
(matching-source-id nil)
(matching-source-position nil)
(starting-match-row-pos nil))
(loop named sid-loop
for i from 0 below (length rest-rows)
for count-line-to-start-matching from 0
for current-row in rest-rows do
(multiple-value-bind (source-line-id source-line)
(original-source-metadata current-row)
(let* ((matching-string-pos (scan fragment source-line)))
(when matching-string-pos
(setf starting-match-row-pos count-line-to-start-matching)
(setf matching-source-line source-line)
(setf matching-source-id source-line-id)
(setf matching-source-position matching-string-pos)
(return-from sid-loop t)))))
(when matching-source-id
(let* ((matching-rows (remove-if-not (lambda (a)
(multiple-value-bind (source-line-id x artificialp)
(original-source-metadata a)
(declare (ignore x))
(and (not artificialp)
(= source-line-id matching-source-id))))
rest-rows))
(text-rows (rows->text-rows matching-rows))
(matching-source-blanks (extract-blanks matching-source-line))
(reconstructed-rows (reconstruct-source-lines text-rows matching-source-blanks))
(line-matched (loop
for reconstructed-row in reconstructed-rows
for line from 0
for length-accum = (length reconstructed-row)
then (+ length-accum
(length reconstructed-row))
while (<= (- length-accum
(prefix-diff text-rows matching-source-line))
matching-source-position)
finally (return line))))
(row-move window (+ starting-match-row-pos line-matched))
(draw window)))))))

View File

@ -371,6 +371,7 @@
:join-with-strings
:join-with-strings*
:split-words
:extract-blanks
:split-lines
:strip-prefix
:strip-withespaces

View File

@ -47,10 +47,11 @@
:test #'string=)))
(deftest match-words (text-utils-suite)
(assert-true (match-words '("a" "b" "c") '("a" "b" "c")))
(assert-true (match-words '("a" "b" "c" "d") '("a" "b" "c")))
(assert-true (match-words '("a" "foo" "bar" "d") '("foo" "bar")))
(assert-true (match-words '("a" "b" "c" "d") '("c" "d")))
(assert-false (match-words '("a" "b" "c" "d") '("b" "a")))
(assert-false (match-words '("a" "b" "c" "d") '("a" "b" "x")))
(assert-false (match-words '("a" "b" "c" "d") '("a" "b" "c" "d" "e"))))
(let ((clunit:*clunit-equality-test* #'=))
(assert-equality* 0 (match-words '("a" "b" "c") '("a" "b" "c")))
(assert-equality* 0 (match-words '("a" "b" "c" "d") '("a" "b" "c")))
(assert-equality* 1 (match-words '("a" "foo" "bar" "d") '("foo" "bar")))
(assert-equality* 2 (match-words '("a" "b" "c" "d") '("c" "d")))
(assert-false (match-words '("a" "b" "c" "d") '("b" "a")))
(assert-false (match-words '("a" "b" "c" "d") '("a" "b" "x")))
(assert-false (match-words '("a" "b" "c" "d") '("a" "b" "c" "d" "e")))))

View File

@ -161,6 +161,9 @@
(defun split-words (text)
(cl-ppcre:split "\\p{White_Space}" text))
(defun extract-blanks (text)
(remove-if #'string-empty-p (cl-ppcre:split "\\P{White_Space}" text)))
(defun split-lines (text)
(let ((res ()))
(flex:with-input-from-sequence (stream (babel:string-to-octets text))
@ -761,10 +764,22 @@ printed in the box column by column; in the example above the results are:
(remove-if #'display-corrupting-utf8-p object))
(defun match-words (words probe &optional (test #'string=))
"Returns the starting position of list of strings `probe' in the
list of string `words' or nil if probe is not in words under predicate
`test'."
(loop for start-words = words then (rest start-words)
for word-start-count from 0
while start-words do
(if (<= (length probe)
(length start-words))
(let ((slice (subseq start-words 0 (length probe))))
(tree-equal slice probe :test test))
(let* ((slice (subseq start-words 0 (length probe)))
(mismatchp (loop named mismatch-loop
for i in slice
for j in probe
do
(if (not (funcall test i j))
(return-from mismatch-loop t)
nil))))
(when (not mismatchp)
(return-from match-words word-start-count)))
(return-from match-words nil))))