mirror of https://gitlab.com/octtspacc/staticoso
Base work for HTML Journal standard conversion
This commit is contained in:
parent
84eb64ff67
commit
7a098b55eb
|
@ -10,6 +10,12 @@
|
|||
from Modules.HTML import *
|
||||
from Modules.Utils import *
|
||||
|
||||
JournalHeadings = ('h2','h3','h4','h5')
|
||||
JournalTitleDecorators = {'(':')', '[':']', '{':'}'}
|
||||
JournalStyles = {
|
||||
"Default": {},
|
||||
"details": {}
|
||||
}
|
||||
HTMLSectionTitleLine = '<h{Index} class="SectionHeading staticoso-SectionHeading"><span class="SectionLink staticoso-SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle staticoso-SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
|
||||
PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]"
|
||||
CategoryPageTemplate = """\
|
||||
|
@ -102,7 +108,7 @@ def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix='')
|
|||
Title = f'<a href="{Href}">{Title}</a>'
|
||||
if Meta['Type'] == 'Post':
|
||||
CreatedOn = Meta['CreatedOn'] if Meta['CreatedOn'] else '?'
|
||||
Title = f"[{CreatedOn}] {Title}"
|
||||
Title = f"[<time>{CreatedOn}</time>] {Title}"
|
||||
return Title
|
||||
|
||||
def FormatTitles(Titles, Flatten=False):
|
||||
|
@ -117,3 +123,48 @@ def FormatTitles(Titles, Flatten=False):
|
|||
End = '</li></ul>' * (n - 1)
|
||||
HTMLTitles += f'<li>{Start}<a href="#{DashyTitle}">{html.escape(Title)}</a>{End}</li>'
|
||||
return f'<ul>{HTMLTitles}</ul>'
|
||||
|
||||
# Clean up a generic HTML tree such that it's compliant with the HTML Journal standard
|
||||
# (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html);
|
||||
# basis is: find an element with the JournalBody attr., and group its direct children as <article>s
|
||||
def MakeHTMLJournal(HTML):
|
||||
Soup, Journal, Entries = MkSoup(HTML), '', []
|
||||
#for t in Soup.find_all(attrs={"journalbody":True}):
|
||||
for t in Soup.find_all(attrs={"htmljournal":True}):
|
||||
JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
|
||||
#if 'journalbody' in t.attrs: # Journal container
|
||||
for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators
|
||||
#print(123,str(c).strip('\n'))
|
||||
for ct in MkSoup(str(c)).find_all():
|
||||
# Transform (almost, for now I reserve some) any heading into h2 and remove any attributes
|
||||
if ct.name in JournalHeadings:
|
||||
Title = ct.text.strip().removeprefix('»').strip()
|
||||
Chr0 = Title[0]
|
||||
# Remove leading symbols b
|
||||
if Chr0 in JournalTitleDecorators.keys():
|
||||
Idx = Title.find(JournalTitleDecorators[Chr0])
|
||||
Title = Title[1:Idx] + ' - ' + Title[Idx+2:]
|
||||
#print(Title)
|
||||
if Journal:
|
||||
Journal += '\n</article>\n'
|
||||
Journal += f'\n<article>\n<h2>{Title}</h2>\n'
|
||||
elif ct.name == 'p': # We should handle any type to preserve <details> and things
|
||||
#print(ct.name)
|
||||
Journal += str(ct)
|
||||
#Journal += '\n</article>\n'
|
||||
#t.replace_with(Journal)
|
||||
#HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work
|
||||
#print(t)
|
||||
#print(Journal)
|
||||
t.attrs["journalheader"] if "journalheader" in t.attrs else ""
|
||||
Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"
|
||||
# <a href=""><img width="88" height="31" src="https://journal.miso.town/static/banner-htmlj.png"></a>
|
||||
Journal = f'''\
|
||||
<h1>{t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}</h1>
|
||||
{t.attrs["journalheader"] if "journalheader" in t.attrs else ""}
|
||||
{Journal}
|
||||
</article>
|
||||
{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
|
||||
'''
|
||||
# Instead of copying stuff from the full page, we use dedicated title, header, and footer
|
||||
return Journal
|
||||
|
|
|
@ -334,9 +334,10 @@ def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLP
|
|||
HTML = ReplWithEsc(HTML, f"[staticoso:DynamicPart:{Path}]", Text)
|
||||
HTML = ReplWithEsc(HTML, f"<staticoso:DynamicPart:{Path}>", Text)
|
||||
|
||||
for e in StaticPartsText:
|
||||
HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
|
||||
HTML = ReplWithEsc(HTML, f"<staticoso:StaticPart:{e}>", StaticPartsText[e])
|
||||
for i in range(2):
|
||||
for e in StaticPartsText:
|
||||
HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
|
||||
HTML = ReplWithEsc(HTML, f"<staticoso:StaticPart:{e}>", StaticPartsText[e])
|
||||
|
||||
if LightRun:
|
||||
HTML = None
|
||||
|
@ -514,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
|
|||
if not LightRun:
|
||||
HTML = DoMinifyHTML(HTML, MinifyKeepComments)
|
||||
ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
|
||||
if Flags['NoScripts'] and ("<script" in ContentHTML or "<script" in HTML):
|
||||
if Flags['NoScripts'] and ("<script" in ContentHTML.lower() or "<script" in HTML.lower()):
|
||||
if not LightRun:
|
||||
HTML = StripTags(HTML, ['script'])
|
||||
ContentHTML = StripTags(ContentHTML, ['script'])
|
||||
|
@ -526,6 +527,8 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
|
|||
if not LightRun:
|
||||
HTML = DoHTMLFixPre(HTML)
|
||||
ContentHTML = DoHTMLFixPre(ContentHTML)
|
||||
if not LightRun and 'htmljournal' in ContentHTML.lower(): # Avoid extra cycles
|
||||
WriteFile(StripExt(PagePath)+'.journal.html', MakeHTMLJournal(ContentHTML))
|
||||
|
||||
if LightRun:
|
||||
SlimHTML = None
|
||||
|
|
Loading…
Reference in New Issue