Complete base for HTML Journal generation

2025-06-05 22:09:23 +02:00 · 2022-11-17 13:03:17 +01:00
parent 7a098b55eb
commit 120d0bff11
10 changed files with 73 additions and 46 deletions
--- a/Assets/COPYING.md
+++ b/Assets/COPYING.md
@@ -0,0 +1,7 @@
+- **Feed-88x31.png**:  
+OctoSpacc, CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0>, from <https://sitoctt.octt.eu.org>  
+Used to represent links to syndication feeds in the generated site.
+
+- **Valid-HTML-Journal-88x31.png**:  
+m15o, Unknown License, from <https://journal.miso.town>  
+Used to represent conformity to the HTML Journal standard in the generated journal pages; Fair use.
--- a/Assets/Feed-88x31.png
+++ b/Assets/Feed-88x31.png
--- a/Assets/Valid-HTML-Journal-88x31.png
+++ b/Assets/Valid-HTML-Journal-88x31.png
--- a/Locale/en.json
+++ b/Locale/en.json
@@ -6,5 +6,6 @@
 	"Comments": "Comments",
 	"OpenInNewTab": "Open in a new tab",
 	"ClickHere": "Click here",
-	"IfNotRedirected": "if you aren't automatically redirected"
+	"IfNotRedirected": "if you aren't automatically redirected",
+	"StrippedDownNotice": "This page has been automatically stripped-down from the original version, <a href=\"{Link}\">available here</a>. Consider visiting that for a better experience."
 }
--- a/Locale/it.json
+++ b/Locale/it.json
@@ -6,5 +6,6 @@
 	"Comments": "Commenti",
 	"OpenInNewTab": "Apri in una nuova scheda",
 	"ClickHere": "Clicca qui",
-	"IfNotRedirected": "se non subisci il reindirizzamento automatico"
+	"IfNotRedirected": "se non subisci il reindirizzamento automatico",
+	"StrippedDownNotice": "Questa pagina è stata automaticamente semplificata dalla versione originale, <a href=\"{Link}\">disponibile qui</a>. Considera di consultare quella per un'esperienza migliore."
 }
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ Needed for Gemtext output support:

 ## Features roadmap

+- [x] Generation of simplified pages compliant with the [HTML Journal standard](https://journal.miso.town)
 - [x] HTML feeds (pages with list of N most recent posts)
 - [x] Lists of all pages in a site directory
 - [x] Page redirects / Alt URLs (+ ActivityPub URL overrides) 
@@ -61,7 +62,7 @@ Needed for Gemtext output support:
 - [ ] Polished Gemtext generation
 - [x] Autodetection of pages and posts
 - [x] Info for posts shown on their page
- [x] HTML and CSS minification
+- [x] HTML and CSS minification for Pages and Assets
 - [x] Full Open Graph support
 - [x] Custom categories for posts
 - [x] Custom static and dynamic page parts
--- a/Source/Modules/Elements.py
+++ b/Source/Modules/Elements.py
@@ -7,15 +7,16 @@
 |   Copyright (C) 2022, OctoSpacc     |
 | ================================= """

+from base64 import b64encode
 from Modules.HTML import *
 from Modules.Utils import *

 JournalHeadings = ('h2','h3','h4','h5')
 JournalTitleDecorators = {'(':')', '[':']', '{':'}'}
-JournalStyles = {
-	"Default": {},
-	"details": {}
-}
+#JournalStyles = {
+#	"Default": {},
+#	"details": {}
+#}
 HTMLSectionTitleLine = '<h{Index} class="SectionHeading staticoso-SectionHeading"><span class="SectionLink staticoso-SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle staticoso-SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
 PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]"
 CategoryPageTemplate = """\
@@ -127,44 +128,52 @@ def FormatTitles(Titles, Flatten=False):
 # Clean up a generic HTML tree such that it's compliant with the HTML Journal standard
 # (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html);
 # basis is: find an element with the JournalBody attr., and group its direct children as <article>s
-def MakeHTMLJournal(HTML):
+def MakeHTMLJournal(Flags, Locale, FilePath, HTML):
 	Soup, Journal, Entries = MkSoup(HTML), '', []
-	#for t in Soup.find_all(attrs={"journalbody":True}):
 	for t in Soup.find_all(attrs={"htmljournal":True}):
-		JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
-		#if 'journalbody' in t.attrs: # Journal container
+		#JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
 		for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators
-			#print(123,str(c).strip('\n'))
 			for ct in MkSoup(str(c)).find_all():
 				# Transform (almost, for now I reserve some) any heading into h2 and remove any attributes
 				if ct.name in JournalHeadings:
 					Title = ct.text.strip().removeprefix('»').strip()
 					Chr0 = Title[0]
-					# Remove leading symbols b
+					# Remove leading symbols before date
 					if Chr0 in JournalTitleDecorators.keys():
 						Idx = Title.find(JournalTitleDecorators[Chr0])
 						Title = Title[1:Idx] + ' - ' + Title[Idx+2:]
-					#print(Title)
 					if Journal:
-						Journal += '\n</article>\n'
+						Journal += '\n</article><br>\n'
 					Journal += f'\n<article>\n<h2>{Title}</h2>\n'
 				elif ct.name == 'p': # We should handle any type to preserve <details> and things
-					#print(ct.name)
 					Journal += str(ct)
-		#Journal += '\n</article>\n'
-		#t.replace_with(Journal)
-		#HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work
-		#print(t)
-		#print(Journal)
-		t.attrs["journalheader"] if "journalheader" in t.attrs else ""
-		Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"
-		# <a href=""><img width="88" height="31" src="https://journal.miso.town/static/banner-htmlj.png"></a>
-		Journal = f'''\
-<h1>{t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}</h1>
-{t.attrs["journalheader"] if "journalheader" in t.attrs else ""}
-{Journal}
-</article>
-{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
-'''
-	# Instead of copying stuff from the full page, we use dedicated title, header, and footer
+		FileName = FilePath.split('/')[-1]
+		URL = f'{Flags["SiteDomain"]}/{StripExt(FilePath)}.Journal.html'
+		# Instead of copying stuff from the full page, for now we use dedicated title, header, footer, and pagination
+		Title = t.attrs["journaltitle"] if 'journaltitle' in t.attrs else f'"{StripExt(FileName)}" Journal - {Flags["SiteName"]}' if Flags["SiteName"] else f'"{StripExt(FileName)}" Journal'
+		FeedLink = f"""<a title="Journal Atom Feed" href="https://journal.miso.town/atom?url={URL}" target="_blank" rel="noopener"><img width="88" height="31" alt="Journal Atom Feed" title="Journal Atom Feed" src="data:image/png;base64,{b64encode(ReadFile(staticosoBaseDir()+'Assets/Feed-88x31.png', 'rb')).decode()}"></a>""" if Flags["SiteDomain"] else ''
+		Header = t.attrs["journalheader"] if 'journalheader' in t.attrs else f"""\
+<p>
+<i>{Locale["StrippedDownNotice"].format(Link="./"+FileName)}</i>
+<a title="Valid HTML Journal" href="https://journal.miso.town" target="_blank" rel="noopener"><img alt="Valid HTML Journal" title="Valid HTML Journal" width="88" height="31" src="data:image/png;base64,{b64encode(ReadFile(staticosoBaseDir()+'Assets/Valid-HTML-Journal-88x31.png', 'rb')).decode()}"></a>
+{FeedLink}
+</p>
+"""
+		Journal = f"""\
+<!DOCTYPE html>
+<html>
+<head>
+	<title>{Title}</title>
+	<link rel="canonical" href="{URL}">
+	<meta http-equiv="refresh" content="0; url='./{FileName}'">
+</head>
+<body>
+	<h1>{Title}</h1>
+	{Header}<br>
+	{Journal}
+	</article><br>
+	{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
+</body>
+</html>
+"""
 	return Journal
--- a/Source/Modules/Site.py
+++ b/Source/Modules/Site.py
@@ -515,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
 		if not LightRun:
 			HTML = DoMinifyHTML(HTML, MinifyKeepComments)
 		ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
-	if Flags['NoScripts'] and ("<script" in ContentHTML.lower() or "<script" in HTML.lower()):
+	if Flags['NoScripts'] and ('<script' in ContentHTML.lower() or '<script' in HTML.lower()):
 		if not LightRun:
 			HTML = StripTags(HTML, ['script'])
 		ContentHTML = StripTags(ContentHTML, ['script'])
@@ -528,7 +528,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
 			HTML = DoHTMLFixPre(HTML)
 		ContentHTML = DoHTMLFixPre(ContentHTML)
 	if not LightRun and 'htmljournal' in ContentHTML.lower(): # Avoid extra cycles
-		WriteFile(StripExt(PagePath)+'.journal.html', MakeHTMLJournal(ContentHTML))
+		WriteFile(StripExt(PagePath)+'.Journal.html', MakeHTMLJournal(Flags, Locale, f'{StripExt(File)}.html', ContentHTML))

 	if LightRun:
 		SlimHTML = None
--- a/Source/Modules/Utils.py
+++ b/Source/Modules/Utils.py
@@ -1,3 +1,4 @@
+
 """ ================================= |
 | This file is part of                |
 |   staticoso                         |
@@ -12,7 +13,7 @@ import os
 from datetime import datetime
 from pathlib import Path

-ReservedPaths = ('Site.ini', 'Assets', 'Resources', 'Pages', 'Posts', 'Templates', 'StaticParts', 'DynamicParts')
+ReservedPaths = ('Site.ini', 'Assets', 'Pages', 'Posts', 'Templates', 'StaticParts', 'DynamicParts')
 FileExtensions = {
 	'Pages': ('htm', 'html', 'markdown', 'md', 'pug', 'txt'),
 	'HTML': ('.htm', '.html'),
@@ -20,23 +21,26 @@ FileExtensions = {
 	'Tmp': ('htm', 'markdown', 'md', 'pug', 'txt')}

 def SureList(e):
-    return e if type(e) == list else [e]
+	return e if type(e) == list else [e]

-def ReadFile(p):
+# Get base directory path of the staticoso program
+def staticosoBaseDir():
+	return f"{os.path.dirname(os.path.abspath(__file__))}/../../"
+
+def ReadFile(p, m='r'):
 	try:
-		with open(p, 'r') as f:
+		with open(p, m) as f:
 			return f.read()
 	except Exception:
-		print(f"[E] Error reading file {p}")
+		logging.error(f"Error reading file {p}")
 		return None

-def WriteFile(p, c):
+def WriteFile(p, c, m='w'):
 	try:
-		with open(p, 'w') as f:
-			f.write(c)
-		return True
+		with open(p, m) as f:
+			return f.write(c)
 	except Exception:
-		print(f"[E] Error writing file {p}")
+		logging.error(f"[E] Error writing file {p}")
 		return False

 def FileToStr(File, Truncate=''):
@@ -153,7 +157,7 @@ def GetFullDate(Date):

 def LoadLocale(Lang):
 	Lang = Lang + '.json'
-	Folder = os.path.dirname(os.path.abspath(__file__)) + '/../../Locale/'
+	Folder = f'{staticosoBaseDir()}Locale/'
 	File = ReadFile(Folder + Lang)
 	if File:
 		return json.loads(File)
--- a/6
+++ b/6
@@ -1,3 +1,7 @@
+- Pages transclusion + probably drop StaticParts (would be redundant)
+- User macros with arguments
+- Specifying language for single pages, with the option applying to the locale used for templating
+- Apply HTML templating to Journal pages (requires a template that won't conflict, aka testing is needed)
 - Internal macro substitutions have to be made until there's nothing to replace
 - Release on pip
 - Alert for deprecated features
@@ -20,7 +24,7 @@
 - Show page size/words/time in meta line
 - Add feed support for diary-like pages
 - Fix excess whitespace in some section/menu titles
- Change all staticoso service tag enclosures from [] to <>
+- Parity presence for [] and <> internal macro enclosure, + streamline the code for that
 - Investigate a strange bug with Macros
 - Handle file extensions with any case sensitivity, not just lowercase; currently the bulk of the issue is finding the files on disk
 - Test sorting by date for files not starting with date, and dated folders