1
0
mirror of https://codeberg.org/cage/tinmop/ synced 2025-01-12 02:13:58 +01:00
tinmop/data/scripts/generate-gemlog.lisp

526 lines
22 KiB
Common Lisp

;; tinmop module for utility move command in thread window
;; Copyright © 2022 cage
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;; this script generates a gemlog starting from an archive directory
;; containing posts in gemtext format
;; this script expect a bunch of input data to fill the gemlog templates.
;; Such data can be provided by the environment variables given below:
;; - GEMLOG_HEADER
;; The header of the gemlog
;; - GEMLOG_TOPIC_INDEX_HEADER
;; The header of the topic index page
;; - GEMLOG_URI_PATH_PREFIX
;; the root of the path for the gemlog (e.g: "gemini://example/cage/" → "/cage/"
;; - GEMLOG_URI_POST_HOME_BACKLINK
;; The URL added to the footer of each post
;; - GEMLOG_URI_POST_HOME_BACKLINK_NAME
;; The label for the URL added to the footer of each post
;; - GEMLOG_URI_INDICES_HOME_BACKLINK
;; The URL added to the footer of the gemlog and topic indices
;; - GEMLOG_URI_HOME_INDICES_BACKLINK_NAME
;; The label for the URL added to the footer of the gemlog and topic indices
;; - GEMLOG_POSTS_SOURCE
;; The source directory where the posts can be found to be processed by this script
;; - GEMLOG_POSTS_DESTINATION
;; the destination directory where the processed posts and generated indices are placed
;; - GEMLOG_ANTENNA_URL
;; the url of the antenna instance
;; - GEMLOG_ANTENNA_TARGET_URL
;; the url of the target url for antenna; that is, the url of your gemlog
;; METADATA
;; The source posts must contains metadata
;; the metadata must be wrapped in a preformatted block and the block's
;; alt-text must be exactly the string "META". The metadata must be
;; added one for line and contains a, colon separated, pair each.
;; So far the only valid metadata lines are:
;; - date: (the date of the post, format: YYY-MM-DD)
;; - topics: The post's' topics: a comma separated list of strings
;; For example
;; […]
;; ``` META
;; date: 2023-08-14
;; topic: comics, cinema
;; ```
(defpackage :generate-gemlog
(:use :cl
:misc
:fs
:html-utils
:gemini-parser
:text-utils)
(:local-nicknames (:a :alexandria)))
(in-package :generate-gemlog)
(a:define-constant +meta-node-attribute+ "META" :test #'string=)
(a:define-constant +meta-topic-key+ :topic :test #'eq)
(a:define-constant +meta-topic-values-delimiters+ "," :test #'string=)
(a:define-constant +meta-date-key+ :date :test #'eq)
(a:define-constant +meta-key-value-delimiter+ ": *" :test #'string=)
(a:define-constant +archive-dir+ "archive" :test #'string=)
(a:define-constant +archive-gemlog-file+ "gemlog.gmi" :test #'string=)
(a:define-constant +archive-topic-file+ "topics.gmi" :test #'string=)
(a:define-constant +post-valid-file-extension+ "gmi$" :test #'string=)
(a:define-constant +environment-variable-gemlog-header+
"GEMLOG_HEADER" :test #'string=)
(a:define-constant +environment-variable-gemlog-topic-index-header+
"GEMLOG_TOPIC_INDEX_HEADER" :test #'string=)
(a:define-constant +environment-variable-gemlog-uri-path-prefix+
"GEMLOG_URI_PATH_PREFIX" :test #'string=)
(a:define-constant +environment-variable-gemlog-post-home-backlink+
"GEMLOG_URI_POST_HOME_BACKLINK" :test #'string=)
(a:define-constant +environment-variable-gemlog-post-home-backlink-name+
"GEMLOG_URI_POST_HOME_BACKLINK_NAME" :test #'string=)
(a:define-constant +environment-variable-gemlog-indices-home-backlink+
"GEMLOG_URI_INDICES_HOME_BACKLINK" :test #'string=)
(a:define-constant +environment-variable-gemlog-indices-home-backlink-name+
"GEMLOG_URI_HOME_INDICES_BACKLINK_NAME" :test #'string=)
(a:define-constant +environment-variable-gemlog-posts-archive+
"GEMLOG_POSTS_SOURCE" :test #'string=)
(a:define-constant +environment-variable-gemlog-posts-destination+
"GEMLOG_POSTS_DESTINATION" :test #'string=)
(a:define-constant +environment-variable-antenna-url+
"GEMLOG_ANTENNA_URL" :test #'string=)
(a:define-constant +environment-variable-antenna-target-url+
"GEMLOG_ANTENNA_TARGET_URL" :test #'string=)
(defun getenv (key default)
(os-utils:getenv key :default default))
(defparameter *gemlog-header*
(getenv +environment-variable-gemlog-header+
(format nil "# Posts~2%")))
(defparameter *topic-index-header*
(getenv +environment-variable-gemlog-topic-index-header+
(format nil "# Topics archive~2%")))
(defparameter *uri-path-prefix*
(getenv +environment-variable-gemlog-uri-path-prefix+ nil))
(defparameter *post-home-backlink*
(getenv +environment-variable-gemlog-post-home-backlink+ "../index.gmi"))
(defparameter *post-home-backlink-name*
(getenv +environment-variable-gemlog-post-home-backlink-name+ "home"))
(defparameter *indices-home-backlink*
(getenv +environment-variable-gemlog-indices-home-backlink+ "./index.gmi"))
(defparameter *indices-home-backlink-name*
(getenv +environment-variable-gemlog-indices-home-backlink-name+ "home"))
(defun parse-date (timestring)
(local-time:parse-timestring timestring))
(defun format-date-to-string (date)
(with-output-to-string (stream)
(local-time:format-timestring stream date
:format '((:year 2) "-" (:month 2) "-" (:day 2)))))
(defun meta-node-p (node)
(a:when-let ((attribute (find-attribute :alt node)))
(string= (trim-blanks (attribute-value attribute))
"META")))
(defun extract-meta (parsed)
(let ((lines (mapcar (lambda (node)
(trim-blanks (first (children node))))
(remove-if-not #'meta-node-p parsed))))
(loop for line in (rest lines)
when line
collect
(let* ((key-value (mapcar #'trim-blanks
(cl-ppcre:split +meta-key-value-delimiter+ line)))
(key (a:make-keyword (string-upcase (first key-value))))
(raw-value (second key-value)))
(cons key
(ecase key
(:topic
(list (mapcar #'trim-blanks
(cl-ppcre:split +meta-topic-values-delimiters+
raw-value))))
(:date
(parse-date raw-value))))))))
(defun extract-non-meta (parsed)
(remove-if #'meta-node-p parsed))
(defclass post ()
((meta
:initform nil
:initarg :meta
:accessor meta)
(content
:initform nil
:initarg :content
:accessor content)
(original-file-path
:initform nil
:initarg :original-file-path
:accessor original-file-path)
(archive-file-path
:initform nil
:initarg :archive-file-path
:accessor archive-file-path)))
(defmethod print-object ((object post) stream)
(with-accessors ((meta meta)
(content content)
(original-file-path original-file-path)) object
(print-unreadable-object (object stream :type t)
(format stream
"~a ~a ~a~%"
meta
content
original-file-path))))
(defun notify (control &rest args)
(if (ui:tui-active-p)
(ui:info-message (apply #'format nil control args))
(apply #'format t control args)))
(defun bulk->posts (capsule-bulk-dir)
(let* ((original-post-files (remove-if-not (lambda (a)
(and (fs:regular-file-p a)
(cl-ppcre:scan +post-valid-file-extension+ a)))
(fs:collect-children capsule-bulk-dir)))
(parsed-posts (mapcar (lambda (a)
(handler-case
(with-initialized-parser
(parse-gemini-file (fs:slurp-file a)))
(error (e)
(notify
"Unable to parse ~a: ~a~%"
a e)
nil)))
original-post-files))
(all-meta (mapcar (lambda (a file)
(handler-case
(let ((meta (extract-meta a)))
(if meta
meta
(progn
(notify "Unable to find metadata for ~a~%"
file)
nil)))
(error (e)
(notify
"Unable to parse metadata ~a: ~a~%"
file e)
nil)))
parsed-posts
original-post-files)))
(loop for original-post-file in original-post-files
for parsed-post in parsed-posts
for meta in all-meta
when (and parsed-post meta)
collect
(make-instance 'post
:original-file-path original-post-file
:content (extract-non-meta parsed-post)
:meta meta))))
(defun post-topics (post)
(with-accessors ((meta meta)) post
(cadr (assoc +meta-topic-key+ meta))))
(defun collect-topics (posts)
(let ((results '()))
(loop for post in posts do
(with-accessors ((meta meta)) post
(a:when-let ((topics (post-topics post)))
(loop for topic in topics do
(pushnew topic results :test #'string-equal)))))
(sort results #'string<)))
(defun post-date (post)
(with-accessors ((meta meta)) post
(cdr (assoc +meta-date-key+ meta))))
(defun sexp->gmi (parsed stream)
(loop for node in parsed do
(let* ((child (first (children node)))
(line (cond
((tag= :h1 node)
(geminize-h1 child))
((tag= :h2 node)
(geminize-h2 child))
((tag= :h3 node)
(geminize-h3 child))
((tag= :li node)
(geminize-list child))
((tag= :quote node)
(geminize-quote child))
((tag= :a node)
(geminize-link (strcat " "
(attribute-value (find-attribute :href node))
" "
child)))
((or (tag= :pre node)
(tag= :pre-end node))
"```")
((null (first (children node)))
(format nil "~%"))
(t
(first (children node))))))
(format stream "~a~%" line))))
(defun create-archive (bulk-posts-dir output-directory)
(let* ((posts (sort (bulk->posts bulk-posts-dir)
(lambda (a b)
(local-time:timestamp> (post-date a)
(post-date b)))))
(all-topics (collect-topics posts))
(archive-dir-path (strcat output-directory "/" +archive-dir+)))
(fs:make-directory archive-dir-path)
(loop for post in posts do
(let* ((file-name (fs:strip-dirs-from-path (original-file-path post)))
(file-path (fs:cat-parent-dir archive-dir-path file-name)))
(setf (archive-file-path post) file-path)
(handler-case
(with-open-file (stream file-path
:direction :output
:if-does-not-exist :create
:if-exists :supersede)
(sexp->gmi (content post) stream)
(write-sequence (geminize-preformatted (format nil "~{#~a~^ ~}" (post-topics post)))
stream)
(write-sequence (geminize-link (strcat " "
*post-home-backlink*
" "
*post-home-backlink-name*))
stream)
(write-char #\Newline stream)
(notify "Processed ~a~%" (original-file-path post)))
(error (e)
(format *error-output*
"skipping ~a, reasons: ~a.~%"
file-path
e)))))
(values posts all-topics)))
(defun write-links (posts stream)
(flet ((clean-link-filename (filename)
(let* ((basename (cl-ppcre:regex-replace "\\.gmi$" filename ""))
(words (cl-ppcre:split "-" basename))
(capitalized-words (loop for ct from 0
for word in words
collect
(if (or (= ct 0)
(> (length word) 3))
(let ((first-char (string-capitalize (elt word 0))))
(text-utils:strcat first-char
(subseq word 1)))
word))))
(text-utils:join-with-strings capitalized-words " "))))
(loop for post in posts do
(let* ((filename (strip-dirs-from-path (archive-file-path post)))
(relative-archive-path (strcat *uri-path-prefix*
(cat-parent-dir +archive-dir+
(percent-encode filename))))
(link-text (strcat (format-date-to-string (post-date post))
" "
(clean-link-filename filename)))
(link (geminize-link (format nil
" ~a ~a~%"
relative-archive-path
link-text))))
(write-sequence link stream)))))
(defun make-gemlog-index (all-posts output-directory)
(let ((gemlog-index-path (cat-parent-dir output-directory +archive-gemlog-file+)))
(with-open-file (gemlog-stream
gemlog-index-path
:direction :output
:if-does-not-exist :create
:if-exists :supersede)
(format gemlog-stream *gemlog-header*)
(write-char #\Newline gemlog-stream)
(write-links all-posts gemlog-stream)
(format gemlog-stream "~%")
(write-sequence (geminize-link (strcat " "
*indices-home-backlink*
" "
*indices-home-backlink-name*))
gemlog-stream)
(write-char #\Newline gemlog-stream))))
(defun make-topic-index (all-posts output-directory all-topics)
(let ((topics-index-path (cat-parent-dir output-directory +archive-topic-file+)))
(with-open-file (stream
topics-index-path
:direction :output
:if-does-not-exist :create
:if-exists :supersede)
(write-sequence *topic-index-header* stream)
(write-char #\Newline stream)
(loop for topic in all-topics do
(format stream "~a~2%" (geminize-h2 topic))
(let ((in-topic-posts (remove-if-not (lambda (post)
(let ((post-topics (post-topics post)))
(find topic
post-topics
:test #'string-equal)))
all-posts)))
(write-links in-topic-posts stream)
(format stream "~%")))
(write-sequence (geminize-link (strcat " "
*indices-home-backlink*
" "
*indices-home-backlink-name*))
stream)
(write-char #\Newline stream))))
(defun generate-antenna-submit-url ()
(multiple-value-bind (actual-iri host path query port fragment)
(gemini-client:displace-iri (iri-parser:iri-parse (getenv +environment-variable-antenna-url+
nil)))
(declare (ignore actual-iri fragment query))
(iri-parser:make-iri gemini-constants:+gemini-scheme+
nil
host
port
path
(getenv +environment-variable-antenna-target-url+ nil))))
(defun generate-gemlog (bulk-posts-dir output-directory)
(multiple-value-bind (all-posts all-topics)
(create-archive bulk-posts-dir output-directory)
(make-topic-index all-posts output-directory all-topics)
(make-gemlog-index all-posts output-directory)))
(a:define-constant +bulk-dir-prompt+ "Original posts directory? " :test #'string=)
(a:define-constant +output-dir-prompt+ "Output directory? " :test #'string=)
(a:define-constant +root-dir-prompt+ "Type the root of the path for the gemlog (e.g: \"gemini://example/cage/\" → \"/cage/\") " :test #'string=)
(defun generate-on-tui ()
(let ((bulk-posts-dir (getenv +environment-variable-gemlog-posts-archive+ nil))
(output-directory (getenv +environment-variable-gemlog-posts-destination+ nil)))
(format t "Starting processing~%")
(labels ((ask-output-directory ()
(ui:ask-string-input #'on-out-complete
:prompt +output-dir-prompt+
:complete-fn #'complete:directory-complete))
(ask-path-prefix ()
(ui:ask-string-input #'on-root-completed
:prompt +root-dir-prompt+))
(generate ()
(generate-gemlog bulk-posts-dir output-directory)
(notify "Gemlog generated~%")
(submit-antenna))
(submit-antenna ()
(when (and (getenv +environment-variable-antenna-target-url+ nil)
(getenv +environment-variable-antenna-url+ nil))
(ui:ask-string-input #'on-submit-to-antenna
:prompt (format nil
"Antenna submit URL: ~a, submit? [y/N]"
(to-s (generate-antenna-submit-url))))))
(on-submit-to-antenna (maybe-accepted)
(ui-goodies::with-valid-yes-at-prompt (maybe-accepted y-pressed-p)
(when y-pressed-p
(gemini-client:slurp-gemini-url (to-s (generate-antenna-submit-url))))
(ui:open-gemini-address output-directory)))
(on-bulk-complete (input-text)
(ui::with-enqueued-process ()
(setf bulk-posts-dir input-text)
(cond
((null output-directory)
(ask-output-directory))
((null *uri-path-prefix*)
(ui::with-enqueued-process ()
(ask-path-prefix)))
(t
(generate)))))
(on-out-complete (out-directory)
(ui::with-enqueued-process ()
(setf output-directory out-directory)
(if (null *uri-path-prefix*)
(ask-path-prefix)
(generate))))
(on-root-completed (root)
(ui::with-enqueued-process ()
(setf *uri-path-prefix* root)
(generate))))
(cond
((null bulk-posts-dir)
(ui:ask-string-input #'on-bulk-complete
:prompt +bulk-dir-prompt+
:complete-fn #'complete:directory-complete))
((null output-directory)
(ui::with-enqueued-process ()
(ask-output-directory)))
((null *uri-path-prefix*)
(ui::with-enqueued-process ()
(ask-path-prefix)))
(t
(generate-gemlog bulk-posts-dir output-directory)
(notify "Gemlog generated~%")
(ui:open-gemini-address output-directory))))))
(defun ask-input-cli (prompt)
(notify "~a~%" prompt)
(finish-output)
(read-line))
(if (ui:tui-active-p)
(generate-on-tui)
(let* ((bulk-posts-dir (or (getenv +environment-variable-gemlog-posts-archive+ nil)
(ask-input-cli +bulk-dir-prompt+)))
(output-directory (or (getenv +environment-variable-gemlog-posts-destination+ nil)
(ask-input-cli +output-dir-prompt+))))
(when (null *uri-path-prefix*)
(setf *uri-path-prefix* (ask-input-cli +root-dir-prompt+)))
(generate-gemlog bulk-posts-dir output-directory)
(when (and (getenv +environment-variable-antenna-target-url+ nil)
(getenv +environment-variable-antenna-url+ nil))
(format t "Antenna submit URL:~%~a~%" (to-s (generate-antenna-submit-url)))
(let ((submit (ask-input-cli "Submit the gemlog to antenna? [y/N]")))
(when (string= submit "y")
(gemini-client:slurp-gemini-url (to-s (generate-antenna-submit-url))))))))