From 7a098b55ebc90fb68aae7b78da9b28f71425b3e2 Mon Sep 17 00:00:00 2001 From: octospacc Date: Wed, 16 Nov 2022 13:07:27 +0100 Subject: [PATCH] Base work for HTML Journal standard conversion --- Source/Modules/Elements.py | 53 +++++++++++++++++++++++++++++++++++++- Source/Modules/Site.py | 11 +++++--- TODO | 1 + 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/Source/Modules/Elements.py b/Source/Modules/Elements.py index 0118575..e4da56b 100644 --- a/Source/Modules/Elements.py +++ b/Source/Modules/Elements.py @@ -10,6 +10,12 @@ from Modules.HTML import * from Modules.Utils import * +JournalHeadings = ('h2','h3','h4','h5') +JournalTitleDecorators = {'(':')', '[':']', '{':'}'} +JournalStyles = { + "Default": {}, + "details": {} +} HTMLSectionTitleLine = '» {Title}' PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]" CategoryPageTemplate = """\ @@ -102,7 +108,7 @@ def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix='') Title = f'{Title}' if Meta['Type'] == 'Post': CreatedOn = Meta['CreatedOn'] if Meta['CreatedOn'] else '?' - Title = f"[{CreatedOn}] {Title}" + Title = f"[] {Title}" return Title def FormatTitles(Titles, Flatten=False): @@ -117,3 +123,48 @@ def FormatTitles(Titles, Flatten=False): End = '' * (n - 1) HTMLTitles += f'
  • {Start}{html.escape(Title)}{End}
  • ' return f'' + +# Clean up a generic HTML tree such that it's compliant with the HTML Journal standard +# (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html); +# basis is: find an element with the JournalBody attr., and group its direct children as
    s +def MakeHTMLJournal(HTML): + Soup, Journal, Entries = MkSoup(HTML), '', [] + #for t in Soup.find_all(attrs={"journalbody":True}): + for t in Soup.find_all(attrs={"htmljournal":True}): + JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default'] + #if 'journalbody' in t.attrs: # Journal container + for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators + #print(123,str(c).strip('\n')) + for ct in MkSoup(str(c)).find_all(): + # Transform (almost, for now I reserve some) any heading into h2 and remove any attributes + if ct.name in JournalHeadings: + Title = ct.text.strip().removeprefix('»').strip() + Chr0 = Title[0] + # Remove leading symbols b + if Chr0 in JournalTitleDecorators.keys(): + Idx = Title.find(JournalTitleDecorators[Chr0]) + Title = Title[1:Idx] + ' - ' + Title[Idx+2:] + #print(Title) + if Journal: + Journal += '\n
    \n' + Journal += f'\n
    \n

    {Title}

    \n' + elif ct.name == 'p': # We should handle any type to preserve
    and things + #print(ct.name) + Journal += str(ct) + #Journal += '\n
    \n' + #t.replace_with(Journal) + #HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work + #print(t) + #print(Journal) + t.attrs["journalheader"] if "journalheader" in t.attrs else "" + Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal" + # + Journal = f'''\ +

    {t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}

    +{t.attrs["journalheader"] if "journalheader" in t.attrs else ""} +{Journal} + +{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""} +''' + # Instead of copying stuff from the full page, we use dedicated title, header, and footer + return Journal diff --git a/Source/Modules/Site.py b/Source/Modules/Site.py index fde691b..db6e9de 100644 --- a/Source/Modules/Site.py +++ b/Source/Modules/Site.py @@ -334,9 +334,10 @@ def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLP HTML = ReplWithEsc(HTML, f"[staticoso:DynamicPart:{Path}]", Text) HTML = ReplWithEsc(HTML, f"", Text) - for e in StaticPartsText: - HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e]) - HTML = ReplWithEsc(HTML, f"", StaticPartsText[e]) + for i in range(2): + for e in StaticPartsText: + HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e]) + HTML = ReplWithEsc(HTML, f"", StaticPartsText[e]) if LightRun: HTML = None @@ -514,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L if not LightRun: HTML = DoMinifyHTML(HTML, MinifyKeepComments) ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments) - if Flags['NoScripts'] and ("