From 120d0bff1158c1821bfe2b06ae1dcca7e1cce9cf Mon Sep 17 00:00:00 2001 From: octospacc Date: Thu, 17 Nov 2022 13:03:17 +0100 Subject: [PATCH] Complete base for HTML Journal generation --- Assets/COPYING.md | 7 +++ Assets/Feed-88x31.png | Bin 0 -> 198 bytes Assets/Valid-HTML-Journal-88x31.png | Bin 0 -> 320 bytes Locale/en.json | 3 +- Locale/it.json | 3 +- README.md | 3 +- Source/Modules/Elements.py | 67 ++++++++++++++++------------ Source/Modules/Site.py | 4 +- Source/Modules/Utils.py | 26 ++++++----- TODO | 6 ++- 10 files changed, 73 insertions(+), 46 deletions(-) create mode 100644 Assets/COPYING.md create mode 100644 Assets/Feed-88x31.png create mode 100644 Assets/Valid-HTML-Journal-88x31.png diff --git a/Assets/COPYING.md b/Assets/COPYING.md new file mode 100644 index 0000000..ad26e3b --- /dev/null +++ b/Assets/COPYING.md @@ -0,0 +1,7 @@ +- **Feed-88x31.png**: +OctoSpacc, CC BY-SA 4.0 , from +Used to represent links to syndication feeds in the generated site. + +- **Valid-HTML-Journal-88x31.png**: +m15o, Unknown License, from +Used to represent conformity to the HTML Journal standard in the generated journal pages; Fair use. diff --git a/Assets/Feed-88x31.png b/Assets/Feed-88x31.png new file mode 100644 index 0000000000000000000000000000000000000000..3a62feeed120ec4f521f1d81da58be575b79bc08 GIT binary patch literal 198 zcmeAS@N?(olHy`uVBq!ia0vp^5kM@@#0(_mmOUy4Qak}ZA+G=H8O}IJ?KhVC|NsAs zw~t={#Y;V1978mMk6t>>*FVdQ&MBb@07EKB!2kdN literal 0 HcmV?d00001 diff --git a/Assets/Valid-HTML-Journal-88x31.png b/Assets/Valid-HTML-Journal-88x31.png new file mode 100644 index 0000000000000000000000000000000000000000..1aeafeafb9be3728890068c83937ef61f933f9d8 GIT binary patch literal 320 zcmeAS@N?(olHy`uVBq!ia0vp^5kM@@!VDxQJ`=wTq*&4&eH|GXHuiJ>Nn{1`8H9bc7xmpwiSRYhY zG!}kjI6cqd1B=sl`^)EiqHa9PvTtHjZa#lKMX**+Wbex854n5gS|;;M59%^1ym(YG z&V5QDci+k0+Y^4ZZ~p#J<>pZ__Q{s<2cMrjzBYJ|M^Qx8jd=^c%$e%lI@4SuOLKDNFH>gIjWCiw{=K zNwi7`))p%{;&60YMwge};=e94rPkzr-&mS>available here. Consider visiting that for a better experience." } diff --git a/Locale/it.json b/Locale/it.json index ef21767..dc859ec 100644 --- a/Locale/it.json +++ b/Locale/it.json @@ -6,5 +6,6 @@ "Comments": "Commenti", "OpenInNewTab": "Apri in una nuova scheda", "ClickHere": "Clicca qui", - "IfNotRedirected": "se non subisci il reindirizzamento automatico" + "IfNotRedirected": "se non subisci il reindirizzamento automatico", + "StrippedDownNotice": "Questa pagina è stata automaticamente semplificata dalla versione originale, disponibile qui. Considera di consultare quella per un'esperienza migliore." } diff --git a/README.md b/README.md index 4cf52b1..b6e038a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Needed for Gemtext output support: ## Features roadmap +- [x] Generation of simplified pages compliant with the [HTML Journal standard](https://journal.miso.town) - [x] HTML feeds (pages with list of N most recent posts) - [x] Lists of all pages in a site directory - [x] Page redirects / Alt URLs (+ ActivityPub URL overrides) @@ -61,7 +62,7 @@ Needed for Gemtext output support: - [ ] Polished Gemtext generation - [x] Autodetection of pages and posts - [x] Info for posts shown on their page -- [x] HTML and CSS minification +- [x] HTML and CSS minification for Pages and Assets - [x] Full Open Graph support - [x] Custom categories for posts - [x] Custom static and dynamic page parts diff --git a/Source/Modules/Elements.py b/Source/Modules/Elements.py index e4da56b..ec6cbe1 100644 --- a/Source/Modules/Elements.py +++ b/Source/Modules/Elements.py @@ -7,15 +7,16 @@ | Copyright (C) 2022, OctoSpacc | | ================================= """ +from base64 import b64encode from Modules.HTML import * from Modules.Utils import * JournalHeadings = ('h2','h3','h4','h5') JournalTitleDecorators = {'(':')', '[':']', '{':'}'} -JournalStyles = { - "Default": {}, - "details": {} -} +#JournalStyles = { +# "Default": {}, +# "details": {} +#} HTMLSectionTitleLine = '» {Title}' PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]" CategoryPageTemplate = """\ @@ -127,44 +128,52 @@ def FormatTitles(Titles, Flatten=False): # Clean up a generic HTML tree such that it's compliant with the HTML Journal standard # (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html); # basis is: find an element with the JournalBody attr., and group its direct children as
s -def MakeHTMLJournal(HTML): +def MakeHTMLJournal(Flags, Locale, FilePath, HTML): Soup, Journal, Entries = MkSoup(HTML), '', [] - #for t in Soup.find_all(attrs={"journalbody":True}): for t in Soup.find_all(attrs={"htmljournal":True}): - JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default'] - #if 'journalbody' in t.attrs: # Journal container + #JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default'] for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators - #print(123,str(c).strip('\n')) for ct in MkSoup(str(c)).find_all(): # Transform (almost, for now I reserve some) any heading into h2 and remove any attributes if ct.name in JournalHeadings: Title = ct.text.strip().removeprefix('»').strip() Chr0 = Title[0] - # Remove leading symbols b + # Remove leading symbols before date if Chr0 in JournalTitleDecorators.keys(): Idx = Title.find(JournalTitleDecorators[Chr0]) Title = Title[1:Idx] + ' - ' + Title[Idx+2:] - #print(Title) if Journal: - Journal += '\n
\n' + Journal += '\n
\n' Journal += f'\n
\n

{Title}

\n' elif ct.name == 'p': # We should handle any type to preserve
and things - #print(ct.name) Journal += str(ct) - #Journal += '\n
\n' - #t.replace_with(Journal) - #HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work - #print(t) - #print(Journal) - t.attrs["journalheader"] if "journalheader" in t.attrs else "" - Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal" - # - Journal = f'''\ -

{t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}

-{t.attrs["journalheader"] if "journalheader" in t.attrs else ""} -{Journal} - -{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""} -''' - # Instead of copying stuff from the full page, we use dedicated title, header, and footer + FileName = FilePath.split('/')[-1] + URL = f'{Flags["SiteDomain"]}/{StripExt(FilePath)}.Journal.html' + # Instead of copying stuff from the full page, for now we use dedicated title, header, footer, and pagination + Title = t.attrs["journaltitle"] if 'journaltitle' in t.attrs else f'"{StripExt(FileName)}" Journal - {Flags["SiteName"]}' if Flags["SiteName"] else f'"{StripExt(FileName)}" Journal' + FeedLink = f"""Journal Atom Feed""" if Flags["SiteDomain"] else '' + Header = t.attrs["journalheader"] if 'journalheader' in t.attrs else f"""\ +

+{Locale["StrippedDownNotice"].format(Link="./"+FileName)} +Valid HTML Journal +{FeedLink} +

+""" + Journal = f"""\ + + + + {Title} + + + + +

{Title}

+ {Header}
+ {Journal} +
+ {t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""} + + +""" return Journal diff --git a/Source/Modules/Site.py b/Source/Modules/Site.py index db6e9de..b56c81a 100644 --- a/Source/Modules/Site.py +++ b/Source/Modules/Site.py @@ -515,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L if not LightRun: HTML = DoMinifyHTML(HTML, MinifyKeepComments) ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments) - if Flags['NoScripts'] and (" +- Parity presence for [] and <> internal macro enclosure, + streamline the code for that - Investigate a strange bug with Macros - Handle file extensions with any case sensitivity, not just lowercase; currently the bulk of the issue is finding the files on disk - Test sorting by date for files not starting with date, and dated folders