Complete base for HTML Journal generation

This commit is contained in:
octospacc 2022-11-17 13:03:17 +01:00
parent 7a098b55eb
commit 120d0bff11
10 changed files with 73 additions and 46 deletions

7
Assets/COPYING.md Normal file
View File

@ -0,0 +1,7 @@
- **Feed-88x31.png**:
OctoSpacc, CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0>, from <https://sitoctt.octt.eu.org>
Used to represent links to syndication feeds in the generated site.
- **Valid-HTML-Journal-88x31.png**:
m15o, Unknown License, from <https://journal.miso.town>
Used to represent conformity to the HTML Journal standard in the generated journal pages; Fair use.

BIN
Assets/Feed-88x31.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 198 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 320 B

View File

@ -6,5 +6,6 @@
"Comments": "Comments",
"OpenInNewTab": "Open in a new tab",
"ClickHere": "Click here",
"IfNotRedirected": "if you aren't automatically redirected"
"IfNotRedirected": "if you aren't automatically redirected",
"StrippedDownNotice": "This page has been automatically stripped-down from the original version, <a href=\"{Link}\">available here</a>. Consider visiting that for a better experience."
}

View File

@ -6,5 +6,6 @@
"Comments": "Commenti",
"OpenInNewTab": "Apri in una nuova scheda",
"ClickHere": "Clicca qui",
"IfNotRedirected": "se non subisci il reindirizzamento automatico"
"IfNotRedirected": "se non subisci il reindirizzamento automatico",
"StrippedDownNotice": "Questa pagina è stata automaticamente semplificata dalla versione originale, <a href=\"{Link}\">disponibile qui</a>. Considera di consultare quella per un'esperienza migliore."
}

View File

@ -40,6 +40,7 @@ Needed for Gemtext output support:
## Features roadmap
- [x] Generation of simplified pages compliant with the [HTML Journal standard](https://journal.miso.town)
- [x] HTML feeds (pages with list of N most recent posts)
- [x] Lists of all pages in a site directory
- [x] Page redirects / Alt URLs (+ ActivityPub URL overrides)
@ -61,7 +62,7 @@ Needed for Gemtext output support:
- [ ] Polished Gemtext generation
- [x] Autodetection of pages and posts
- [x] Info for posts shown on their page
- [x] HTML and CSS minification
- [x] HTML and CSS minification for Pages and Assets
- [x] Full Open Graph support
- [x] Custom categories for posts
- [x] Custom static and dynamic page parts

View File

@ -7,15 +7,16 @@
| Copyright (C) 2022, OctoSpacc |
| ================================= """
from base64 import b64encode
from Modules.HTML import *
from Modules.Utils import *
JournalHeadings = ('h2','h3','h4','h5')
JournalTitleDecorators = {'(':')', '[':']', '{':'}'}
JournalStyles = {
"Default": {},
"details": {}
}
#JournalStyles = {
# "Default": {},
# "details": {}
#}
HTMLSectionTitleLine = '<h{Index} class="SectionHeading staticoso-SectionHeading"><span class="SectionLink staticoso-SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle staticoso-SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
PugSectionTitleLine = "{Start}{Heading}.SectionHeading.staticoso-SectionHeading #[span.SectionLink.staticoso-SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle.staticoso-SectionTitle {Rest}]"
CategoryPageTemplate = """\
@ -127,44 +128,52 @@ def FormatTitles(Titles, Flatten=False):
# Clean up a generic HTML tree such that it's compliant with the HTML Journal standard
# (https://m15o.ichi.city/site/subscribing-to-a-journal-page.html);
# basis is: find an element with the JournalBody attr., and group its direct children as <article>s
def MakeHTMLJournal(HTML):
def MakeHTMLJournal(Flags, Locale, FilePath, HTML):
Soup, Journal, Entries = MkSoup(HTML), '', []
#for t in Soup.find_all(attrs={"journalbody":True}):
for t in Soup.find_all(attrs={"htmljournal":True}):
JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
#if 'journalbody' in t.attrs: # Journal container
#JournalStyle = JournalStyles[t.attrs["journalstyle"]] if 'journalstyle' in t.attrs and t.attrs["journalstyle"] in JournalStyles else JournalStyles['Default']
for c in t.children: # Entries, some might be entirely grouped in their own element but others could not, use headings as separators
#print(123,str(c).strip('\n'))
for ct in MkSoup(str(c)).find_all():
# Transform (almost, for now I reserve some) any heading into h2 and remove any attributes
if ct.name in JournalHeadings:
Title = ct.text.strip().removeprefix('»').strip()
Chr0 = Title[0]
# Remove leading symbols b
# Remove leading symbols before date
if Chr0 in JournalTitleDecorators.keys():
Idx = Title.find(JournalTitleDecorators[Chr0])
Title = Title[1:Idx] + ' - ' + Title[Idx+2:]
#print(Title)
if Journal:
Journal += '\n</article>\n'
Journal += '\n</article><br>\n'
Journal += f'\n<article>\n<h2>{Title}</h2>\n'
elif ct.name == 'p': # We should handle any type to preserve <details> and things
#print(ct.name)
Journal += str(ct)
#Journal += '\n</article>\n'
#t.replace_with(Journal)
#HTML = HTML.replace(str(t), Journal) # Have to do this crap, bs4's replace_with doesn't wanna work
#print(t)
#print(Journal)
t.attrs["journalheader"] if "journalheader" in t.attrs else ""
Title = t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"
# <a href=""><img width="88" height="31" src="https://journal.miso.town/static/banner-htmlj.png"></a>
Journal = f'''\
<h1>{t.attrs["journaltitle"] if "journaltitle" in t.attrs else f"Untitled HTML Journal"}</h1>
{t.attrs["journalheader"] if "journalheader" in t.attrs else ""}
{Journal}
</article>
{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
'''
# Instead of copying stuff from the full page, we use dedicated title, header, and footer
FileName = FilePath.split('/')[-1]
URL = f'{Flags["SiteDomain"]}/{StripExt(FilePath)}.Journal.html'
# Instead of copying stuff from the full page, for now we use dedicated title, header, footer, and pagination
Title = t.attrs["journaltitle"] if 'journaltitle' in t.attrs else f'"{StripExt(FileName)}" Journal - {Flags["SiteName"]}' if Flags["SiteName"] else f'"{StripExt(FileName)}" Journal'
FeedLink = f"""<a title="Journal Atom Feed" href="https://journal.miso.town/atom?url={URL}" target="_blank" rel="noopener"><img width="88" height="31" alt="Journal Atom Feed" title="Journal Atom Feed" src="data:image/png;base64,{b64encode(ReadFile(staticosoBaseDir()+'Assets/Feed-88x31.png', 'rb')).decode()}"></a>""" if Flags["SiteDomain"] else ''
Header = t.attrs["journalheader"] if 'journalheader' in t.attrs else f"""\
<p>
<i>{Locale["StrippedDownNotice"].format(Link="./"+FileName)}</i>
<a title="Valid HTML Journal" href="https://journal.miso.town" target="_blank" rel="noopener"><img alt="Valid HTML Journal" title="Valid HTML Journal" width="88" height="31" src="data:image/png;base64,{b64encode(ReadFile(staticosoBaseDir()+'Assets/Valid-HTML-Journal-88x31.png', 'rb')).decode()}"></a>
{FeedLink}
</p>
"""
Journal = f"""\
<!DOCTYPE html>
<html>
<head>
<title>{Title}</title>
<link rel="canonical" href="{URL}">
<meta http-equiv="refresh" content="0; url='./{FileName}'">
</head>
<body>
<h1>{Title}</h1>
{Header}<br>
{Journal}
</article><br>
{t.attrs["journalfooter"] if "journalfooter" in t.attrs else ""}
</body>
</html>
"""
return Journal

View File

@ -515,7 +515,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
if not LightRun:
HTML = DoMinifyHTML(HTML, MinifyKeepComments)
ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
if Flags['NoScripts'] and ("<script" in ContentHTML.lower() or "<script" in HTML.lower()):
if Flags['NoScripts'] and ('<script' in ContentHTML.lower() or '<script' in HTML.lower()):
if not LightRun:
HTML = StripTags(HTML, ['script'])
ContentHTML = StripTags(ContentHTML, ['script'])
@ -528,7 +528,7 @@ def HandlePage(Flags, Page, Pages, Categories, LimitFiles, Snippets, ConfMenu, L
HTML = DoHTMLFixPre(HTML)
ContentHTML = DoHTMLFixPre(ContentHTML)
if not LightRun and 'htmljournal' in ContentHTML.lower(): # Avoid extra cycles
WriteFile(StripExt(PagePath)+'.journal.html', MakeHTMLJournal(ContentHTML))
WriteFile(StripExt(PagePath)+'.Journal.html', MakeHTMLJournal(Flags, Locale, f'{StripExt(File)}.html', ContentHTML))
if LightRun:
SlimHTML = None

View File

@ -1,3 +1,4 @@
""" ================================= |
| This file is part of |
| staticoso |
@ -12,7 +13,7 @@ import os
from datetime import datetime
from pathlib import Path
ReservedPaths = ('Site.ini', 'Assets', 'Resources', 'Pages', 'Posts', 'Templates', 'StaticParts', 'DynamicParts')
ReservedPaths = ('Site.ini', 'Assets', 'Pages', 'Posts', 'Templates', 'StaticParts', 'DynamicParts')
FileExtensions = {
'Pages': ('htm', 'html', 'markdown', 'md', 'pug', 'txt'),
'HTML': ('.htm', '.html'),
@ -20,23 +21,26 @@ FileExtensions = {
'Tmp': ('htm', 'markdown', 'md', 'pug', 'txt')}
def SureList(e):
return e if type(e) == list else [e]
return e if type(e) == list else [e]
def ReadFile(p):
# Get base directory path of the staticoso program
def staticosoBaseDir():
return f"{os.path.dirname(os.path.abspath(__file__))}/../../"
def ReadFile(p, m='r'):
try:
with open(p, 'r') as f:
with open(p, m) as f:
return f.read()
except Exception:
print(f"[E] Error reading file {p}")
logging.error(f"Error reading file {p}")
return None
def WriteFile(p, c):
def WriteFile(p, c, m='w'):
try:
with open(p, 'w') as f:
f.write(c)
return True
with open(p, m) as f:
return f.write(c)
except Exception:
print(f"[E] Error writing file {p}")
logging.error(f"[E] Error writing file {p}")
return False
def FileToStr(File, Truncate=''):
@ -153,7 +157,7 @@ def GetFullDate(Date):
def LoadLocale(Lang):
Lang = Lang + '.json'
Folder = os.path.dirname(os.path.abspath(__file__)) + '/../../Locale/'
Folder = f'{staticosoBaseDir()}Locale/'
File = ReadFile(Folder + Lang)
if File:
return json.loads(File)

6
TODO
View File

@ -1,3 +1,7 @@
- Pages transclusion + probably drop StaticParts (would be redundant)
- User macros with arguments
- Specifying language for single pages, with the option applying to the locale used for templating
- Apply HTML templating to Journal pages (requires a template that won't conflict, aka testing is needed)
- Internal macro substitutions have to be made until there's nothing to replace
- Release on pip
- Alert for deprecated features
@ -20,7 +24,7 @@
- Show page size/words/time in meta line
- Add feed support for diary-like pages
- Fix excess whitespace in some section/menu titles
- Change all staticoso service tag enclosures from [] to <>
- Parity presence for [] and <> internal macro enclosure, + streamline the code for that
- Investigate a strange bug with Macros
- Handle file extensions with any case sensitivity, not just lowercase; currently the bulk of the issue is finding the files on disk
- Test sorting by date for files not starting with date, and dated folders