Base work for HTML Journal standard conversion

2022-11-16 13:07:27 +01:00 · 2022-11-16 13:07:27 +01:00 · 7a098b55eb
parent 84eb64ff67
commit 7a098b55eb
3 changed files with 60 additions and 5 deletions
--- a/Source/Modules/Elements.py
+++ b/Source/Modules/Elements.py
@ -10,6 +10,12 @@
 from Modules.HTML import *
 from Modules.Utils import *

+JournalHeadings = ('h2','h3','h4','h5')
+JournalTitleDecorators = {'(':')', '[':']', '{':'}'}
+JournalStyles = {
+	"Default": {},
+	"details": {}
+}
 HTMLSectionTitleLine = '<h{Index} class="SectionHeading staticoso-SectionHeading"><span class="SectionLink staticoso-SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle staticoso-SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
 PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]"
 CategoryPageTemplate = """\
@ -102,7 +108,7 @@ def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix='')
 		Title = f'<a href="{Href}">{Title}</a>'
 	if Meta['Type'] == 'Post':
 		CreatedOn = Meta['CreatedOn'] if Meta['CreatedOn'] else '?'
-		Title = f"[{CreatedOn}] {Title}"
+		Title = f"[<time>{CreatedOn}</time>] {Title}"
 	return Title

 def FormatTitles(Titles, Flatten=False):
@ -117,3 +123,48 @@ def FormatTitles(Titles, Flatten=False):
 		End = '</li></ul>' * (n - 1)
 		HTMLTitles += f'<li>{Start}<a href="#{DashyTitle}">{html.escape(Title)}</a>{End}</li>'
 	return f'<ul>{HTMLTitles}</ul>'
+
+# Clean up a generic HTML tree such that it's compliant with the HTML Journal standard
+# (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html);
+# basis is: find an element with the JournalBody attr., and group its direct children as <article>s
+def MakeHTMLJournal(HTML):
+	Soup, Journal, Entries = MkSoup(HTML), '', []
+	#for t in Soup.find_all(attrs={"journalbody":True}):
+	for t in Soup.find_all(attrs={"htmljournal":True}):
+		JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
+		#if 'journalbody' in t.attrs: # Journal container
+		for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators
+			#print(123,str(c).strip('\n'))
+			for ct in MkSoup(str(c)).find_all():
+				# Transform (almost, for now I reserve some) any heading into h2 and remove any attributes
+				if ct.name in JournalHeadings:
+					Title = ct.text.strip().removeprefix('»').strip()
+					Chr0 = Title[0]
+					# Remove leading symbols b
+					if Chr0 in JournalTitleDecorators.keys():
+						Idx = Title.find(JournalTitleDecorators[Chr0])
+						Title = Title[1:Idx] + ' - ' + Title[Idx+2:]
+					#print(Title)
+					if Journal:
+						Journal += '\n</article>\n'
+					Journal += f'\n<article>\n<h2>{Title}</h2>\n'
+				elif ct.name == 'p': # We should handle any type to preserve <details> and things
+					#print(ct.name)
+					Journal += str(ct)
+		#Journal += '\n</article>\n'
+		#t.replace_with(Journal)
+		#HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work
+		#print(t)
+		#print(Journal)
+		t.attrs["journalheader"] if "journalheader" in t.attrs else ""
+		Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"
+		# <a href=""><img width="88" height="31" src="https://journal.miso.town/static/banner-htmlj.png"></a>
+		Journal = f'''\
+<h1>{t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}</h1>
+{t.attrs["journalheader"] if "journalheader" in t.attrs else ""}
+{Journal}
+</article>
+{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
+'''
+	# Instead of copying stuff from the full page, we use dedicated title, header, and footer
+	return Journal
--- a/Source/Modules/Site.py
+++ b/Source/Modules/Site.py
@ -334,9 +334,10 @@ def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLP
 				HTML = ReplWithEsc(HTML, f"[staticoso:DynamicPart:{Path}]", Text)
 				HTML = ReplWithEsc(HTML, f"<staticoso:DynamicPart:{Path}>", Text)

-	for e in StaticPartsText:
-		HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
-		HTML = ReplWithEsc(HTML, f"<staticoso:StaticPart:{e}>", StaticPartsText[e])
+	for i in range(2):
+		for e in StaticPartsText:
+			HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
+			HTML = ReplWithEsc(HTML, f"<staticoso:StaticPart:{e}>", StaticPartsText[e])

 	if LightRun:
 		HTML = None
@ -514,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
 		if not LightRun:
 			HTML = DoMinifyHTML(HTML, MinifyKeepComments)
 		ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
-	if Flags['NoScripts'] and ("<script" in ContentHTML or "<script" in HTML):
+	if Flags['NoScripts'] and ("<script" in ContentHTML.lower() or "<script" in HTML.lower()):
 		if not LightRun:
 			HTML = StripTags(HTML, ['script'])
 		ContentHTML = StripTags(ContentHTML, ['script'])
@ -526,6 +527,8 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
 		if not LightRun:
 			HTML = DoHTMLFixPre(HTML)
 		ContentHTML = DoHTMLFixPre(ContentHTML)
+	if not LightRun and 'htmljournal' in ContentHTML.lower(): # Avoid extra cycles
+		WriteFile(StripExt(PagePath)+'.journal.html', MakeHTMLJournal(ContentHTML))

 	if LightRun:
 		SlimHTML = None
--- a/1
+++ b/1
@ -1,3 +1,4 @@
+- Internal macro substitutions have to be made until there's nothing to replace
 - Release on pip
 - Alert for deprecated features
 - WriteFreely/Wordpress/Blogger integration for reposting+comments