Base work for HTML Journal standard conversion

This commit is contained in:
octospacc 2022-11-16 13:07:27 +01:00
parent 84eb64ff67
commit 7a098b55eb
3 changed files with 60 additions and 5 deletions

View File

@ -10,6 +10,12 @@
from Modules.HTML import *
from Modules.Utils import *
JournalHeadings = ('h2','h3','h4','h5')
JournalTitleDecorators = {'(':')', '[':']', '{':'}'}
JournalStyles = {
"Default": {},
"details": {}
}
HTMLSectionTitleLine = '<h{Index} class="SectionHeading staticoso-SectionHeading"><span class="SectionLink staticoso-SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle staticoso-SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]"
CategoryPageTemplate = """\
@ -102,7 +108,7 @@ def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix='')
Title = f'<a href="{Href}">{Title}</a>'
if Meta['Type'] == 'Post':
CreatedOn = Meta['CreatedOn'] if Meta['CreatedOn'] else '?'
Title = f"[{CreatedOn}] {Title}"
Title = f"[<time>{CreatedOn}</time>] {Title}"
return Title
def FormatTitles(Titles, Flatten=False):
@ -117,3 +123,48 @@ def FormatTitles(Titles, Flatten=False):
End = '</li></ul>' * (n - 1)
HTMLTitles += f'<li>{Start}<a href="#{DashyTitle}">{html.escape(Title)}</a>{End}</li>'
return f'<ul>{HTMLTitles}</ul>'
# Clean up a generic HTML tree such that it's compliant with the HTML Journal standard
# (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html);
# basis is: find an element with the JournalBody attr., and group its direct children as <article>s
def MakeHTMLJournal(HTML):
Soup, Journal, Entries = MkSoup(HTML), '', []
#for t in Soup.find_all(attrs={"journalbody":True}):
for t in Soup.find_all(attrs={"htmljournal":True}):
JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
#if 'journalbody' in t.attrs: # Journal container
for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators
#print(123,str(c).strip('\n'))
for ct in MkSoup(str(c)).find_all():
# Transform (almost, for now I reserve some) any heading into h2 and remove any attributes
if ct.name in JournalHeadings:
Title = ct.text.strip().removeprefix('»').strip()
Chr0 = Title[0]
# Remove leading symbols b
if Chr0 in JournalTitleDecorators.keys():
Idx = Title.find(JournalTitleDecorators[Chr0])
Title = Title[1:Idx] + ' - ' + Title[Idx+2:]
#print(Title)
if Journal:
Journal += '\n</article>\n'
Journal += f'\n<article>\n<h2>{Title}</h2>\n'
elif ct.name == 'p': # We should handle any type to preserve <details> and things
#print(ct.name)
Journal += str(ct)
#Journal += '\n</article>\n'
#t.replace_with(Journal)
#HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work
#print(t)
#print(Journal)
t.attrs["journalheader"] if "journalheader" in t.attrs else ""
Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"
# <a href=""><img width="88" height="31" src="https://journal.miso.town/static/banner-htmlj.png"></a>
Journal = f'''\
<h1>{t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}</h1>
{t.attrs["journalheader"] if "journalheader" in t.attrs else ""}
{Journal}
</article>
{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
'''
# Instead of copying stuff from the full page, we use dedicated title, header, and footer
return Journal

View File

@ -334,9 +334,10 @@ def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLP
HTML = ReplWithEsc(HTML, f"[staticoso:DynamicPart:{Path}]", Text)
HTML = ReplWithEsc(HTML, f"<staticoso:DynamicPart:{Path}>", Text)
for e in StaticPartsText:
HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
HTML = ReplWithEsc(HTML, f"<staticoso:StaticPart:{e}>", StaticPartsText[e])
for i in range(2):
for e in StaticPartsText:
HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
HTML = ReplWithEsc(HTML, f"<staticoso:StaticPart:{e}>", StaticPartsText[e])
if LightRun:
HTML = None
@ -514,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
if not LightRun:
HTML = DoMinifyHTML(HTML, MinifyKeepComments)
ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
if Flags['NoScripts'] and ("<script" in ContentHTML or "<script" in HTML):
if Flags['NoScripts'] and ("<script" in ContentHTML.lower() or "<script" in HTML.lower()):
if not LightRun:
HTML = StripTags(HTML, ['script'])
ContentHTML = StripTags(ContentHTML, ['script'])
@ -526,6 +527,8 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
if not LightRun:
HTML = DoHTMLFixPre(HTML)
ContentHTML = DoHTMLFixPre(ContentHTML)
if not LightRun and 'htmljournal' in ContentHTML.lower(): # Avoid extra cycles
WriteFile(StripExt(PagePath)+'.journal.html', MakeHTMLJournal(ContentHTML))
if LightRun:
SlimHTML = None

1
TODO
View File

@ -1,3 +1,4 @@
- Internal macro substitutions have to be made until there's nothing to replace
- Release on pip
- Alert for deprecated features
- WriteFreely/Wordpress/Blogger integration for reposting+comments