Filename as title for untitled pages, title attr. adition, add TXT input pages + file formats fixes

This commit is contained in:
2022-08-14 17:35:58 +02:00
parent 1faf0014be
commit be37e2d845
7 changed files with 69 additions and 40 deletions

View File

@ -7,7 +7,12 @@ I'm making this because I need a simple and kind-of-minimal program to serve me
This won't replace any of the big projects out there that do the same thing but, as all of my projects, I'm releasing it as free libre software, in the hope that someone would find it useful.
Also, this software is needed for someone to edit and compile my personal sub-website [sitoctt](https://gitlab.com/octtspacc/sitoctt) from its source. Being that site too released under a libre license that allows modifications, I have to also release the tools I use to build it.
Feel free to experiment with all of this stuff!
Everything is still an heavy WIP, and features might break across commits, but feel free to experiment with all of this stuff!
## Documentation
Documentation, can be found at [staticoso-docs.gitlab.io](https://staticoso-docs.gitlab.io).
Obviously, it's built with staticoso itself 😁️. Its source repo can be found at [gitlab.com/octtspacc/staticoso-docs](https://gitlab.com/octtspacc/staticoso-docs).
## Dependencies
- [Python == 3.10.4](https://python.org)
@ -22,6 +27,10 @@ Feel free to experiment with all of this stuff!
- [html2gmi](https://github.com/LukeEmmet/html2gmi)
## Features roadmap
- [ ] Configuration with both INI files and CLI arguments
- [ ] Category-based feeds
- [ ] Support for multi-language sites
- [x] The `title` attribute added to images which only have `alt` (for desktop accessibility)
- [x] Local (per-page) and global (per-site) macros
- [x] ActivityPub (Mastodon) support (Feed + embedded comments)
- [ ] Polished Gemtext generation
@ -44,5 +53,5 @@ Feel free to experiment with all of this stuff!
- [x] Generation of titles in right sidebar with clickable links
- [x] Detections of titles in a page
- [x] Custom static page parts by template
- [x] _HTML_, _Extended Markdown_, and _Pug_ supported for input page files
- [x] _HTML_, TXT, _Extended Markdown_, and _Pug_ supported for input page files
- [x] Ready for use

View File

@ -63,7 +63,7 @@ def GetConfMenu(Entries, MarkdownExts):
for i in Entries:
e = Entries[i]
if not ((e.startswith('<') or e.startswith('[') or e.startswith('- ')) and (e.endswith('>') or e.endswith(')') or e.endswith('}'))):
if not e.lower().endswith('.html'):
if not (e.lower().endswith('.html') or e.lower().endswith('.htm')):
e += '.html'
Menu[int(i)] = e
return Menu
@ -116,10 +116,10 @@ def Main(Args, FeedEntries):
print("[I] Generating HTML")
Pages = MakeSite(
TemplatesText=LoadFromDir('Templates', '*.html'),
StaticPartsText=LoadFromDir('StaticParts', '*.html'),
TemplatesText=LoadFromDir('Templates', ['*.htm', '*.html']),
StaticPartsText=LoadFromDir('StaticParts', ['*.htm', '*.html']),
DynamicParts=literal_eval(Args.DynamicParts) if Args.DynamicParts else {},
DynamicPartsText=LoadFromDir('DynamicParts', '*.html'),
DynamicPartsText=LoadFromDir('DynamicParts', ['*.htm', '*.html']),
ConfMenu=ConfMenu,
GlobalMacros=ReadConf(SiteConf, 'Macros'),
SiteName=SiteName,
@ -190,8 +190,10 @@ def Main(Args, FeedEntries):
Pages,
Header=Args.GemtextHeader if Args.GemtextHeader else f"# {SiteName}\n\n" if SiteName else '')
print("[I] Last Steps")
print("[I] Cleaning Temporary Files")
DelTmp()
print("[I] Copying Assets")
os.system("cp -R Assets/* public/")
print("[I] Done!")

View File

@ -32,7 +32,7 @@ def StripAttrs(HTML):
t.attrs = {}
return str(Soup)
def StripTags(HTML, ToStrip):
def StripTags(HTML, ToStrip): # Remove desired tags from the HTML
Soup = MkSoup(HTML)
Tags = Soup.find_all()
for t in Tags:
@ -40,6 +40,14 @@ def StripTags(HTML, ToStrip):
t.replace_with('')
return str(Soup)
def ImgAltToTitle(HTML): # Adds title attr. to <img> which don't have it, but have alt text
Soup = MkSoup(HTML)
Tags = Soup.find_all('img')
for t in Tags:
if 'alt' in t.attrs and 'title' not in t.attrs:
t.attrs.update({'title': t.attrs['alt']})
return str(Soup)
def AddToTagStartEnd(HTML, MatchStart, MatchEnd, AddStart, AddEnd): # This doesn't handle nested tags
StartPos = None
for i,e in enumerate(HTML):

View File

@ -16,7 +16,7 @@ def PugCompileList(Pages):
# Pug-cli seems to shit itself with folder paths as input, so we pass ALL the files as arguments
Paths = ''
for File, Content, Titles, Meta in Pages:
if File.endswith('.pug'):
if File.lower().endswith('.pug'):
Path = 'public/{}'.format(File)
WriteFile(Path, Content)
Paths += '"{}" '.format(Path)

View File

@ -30,13 +30,13 @@ def MakeLinkableTitle(Line, Title, DashTitle, Type):
NewLine += Line[Index+2:]
return NewLine
def GetTitle(Meta, Titles, Prefer='MetaTitle', BlogName=None):
def GetTitle(FileName, Meta, Titles, Prefer='MetaTitle', BlogName=None):
if Prefer == 'BodyTitle':
Title = Titles[0].lstrip('#') if Titles else Meta['Title'] if Meta['Title'] else 'Untitled'
Title = Titles[0].lstrip('#') if Titles else Meta['Title'] if Meta['Title'] else FileName
elif Prefer == 'MetaTitle':
Title = Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else 'Untitled'
Title = Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
elif Prefer == 'HTMLTitle':
Title = Meta['HTMLTitle'] if Meta['HTMLTitle'] else Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else 'Untitled'
Title = Meta['HTMLTitle'] if Meta['HTMLTitle'] else Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
if BlogName and 'Blog' in Meta['Categories']:
Title += ' - ' + BlogName
return Title
@ -140,6 +140,7 @@ def TemplatePreprocessor(Text):
def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
File = ReadFile(Path)
Path = Path.lower()
Content, Titles, DashyTitles, HTMLTitlesFound, Macros, Meta, MetaDefault = '', [], [], False, '', '', {
'Template': SiteTemplate,
'Style': '',
@ -165,7 +166,7 @@ def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
Macros += lll[1:].lstrip() + '\n'
else:
Headings = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
if Path.endswith('.html') and not HTMLTitlesFound:
if Path.endswith(FileExtensions['HTML']) and not HTMLTitlesFound:
Soup = BeautifulSoup(File, 'html.parser')
Tags = Soup.find_all()
for t in Tags:
@ -177,7 +178,7 @@ def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
t.replace_with(MakeLinkableTitle(None, Title, DashTitle, 'md'))
Content = str(Soup.prettify(formatter=None))
HTMLTitlesFound = True
elif Path.endswith('.md'):
elif Path.endswith(FileExtensions['Markdown']):
if ll.startswith('#'):
DashTitle = DashifyTitle(l.lstrip('#'), DashyTitles)
DashyTitles += [DashTitle]
@ -201,6 +202,8 @@ def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
Content += MakeLinkableTitle(l, Title, DashTitle, 'pug') + '\n'
else:
Content += l + '\n'
elif Path.endswith('.txt'):
Content += l + '\n'
Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
for i in MetaDefault:
if i in Meta:
@ -229,7 +232,7 @@ def PagePostprocessor(FileType, Text, Meta):
return Text
def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix=''):
Title = GetTitle(Meta, Titles, Prefer, BlogName)
Title = GetTitle(File.split('/')[-1], Meta, Titles, Prefer, BlogName)
Link = False if Meta['Index'] == 'Unlinked' else True
if Link:
Title = '[{}]({})'.format(
@ -282,6 +285,7 @@ def CanIndex(Index, For):
def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale):
HTMLTitles = FormatTitles(Titles)
BodyDescription, BodyImage = '', ''
if not File.lower().endswith('.txt'):
Soup = BeautifulSoup(Content, 'html.parser')
if not BodyDescription and Soup.p:
@ -292,7 +296,7 @@ def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLP
#Content = SquareFnrefs(Content)
Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')
Title = GetTitle(Meta, Titles, 'MetaTitle', BlogName)
Title = GetTitle(File.split('/')[-1], Meta, Titles, 'MetaTitle', BlogName)
Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
Image = GetImage(Meta, BodyImage, 'MetaImage')
@ -439,10 +443,14 @@ def MakeSite(TemplatesText, StaticPartsText, DynamicParts, DynamicPartsText, Con
print("[I] Writing Pages")
for File, Content, Titles, Meta in Pages:
PagePath = 'public/{}.html'.format(StripExt(File))
if File.endswith('.md'):
if File.lower().endswith(('.markdown', '.md')):
Content = markdown(PagePostprocessor('md', Content, Meta), extensions=MarkdownExts)
elif File.endswith(('.pug')):
elif File.lower().endswith(('.pug')):
Content = PagePostprocessor('pug', ReadFile(PagePath), Meta)
elif File.lower().endswith(('.txt')):
Content = '<pre>' + Content + '</pre>'
elif File.lower().endswith(('.htm', '.html')):
Content = ReadFile(PagePath)
TemplateMeta = TemplatePreprocessor(TemplatesText[Meta['Template']])
HTMLPagesList = GetHTMLPagesList(
@ -474,6 +482,8 @@ def MakeSite(TemplatesText, StaticPartsText, DynamicParts, DynamicPartsText, Con
Categories=Categories,
SiteLang=SiteLang,
Locale=Locale)
HTML = ImgAltToTitle(HTML)
if NoScripts:
HTML = StripTags(HTML, ['script'])
if Minify:

View File

@ -13,8 +13,10 @@ from datetime import datetime
from pathlib import Path
FileExtensions = {
'Pages': ('htm', 'html', 'md', 'pug', 'txt'),
'Tmp': ('md', 'pug', 'txt')}
'Pages': ('htm', 'html', 'markdown', 'md', 'pug', 'txt'),
'HTML': ('.htm', '.html'),
'Markdown': ('.markdown', '.md'),
'Tmp': ('htm', 'markdown', 'md', 'pug', 'txt')}
def ReadFile(p):
try:
@ -40,9 +42,12 @@ def FileToStr(File, Truncate=''):
def IgnoreFiles(Dir, Files):
return [f for f in Files if os.path.isfile(os.path.join(Dir, f))]
def LoadFromDir(Dir, Rglob):
def LoadFromDir(Dir, Matchs):
Contents = {}
for File in Path(Dir).rglob(Rglob):
if type(Matchs) != list:
Matchs = [Matchs]
for Match in Matchs:
for File in Path(Dir).rglob(Match):
File = str(File)[len(Dir)+1:]
Contents.update({File: ReadFile('{}/{}'.format(Dir, File))})
return Contents

11
TODO
View File

@ -1,24 +1,19 @@
- Handle file extensions with any case sensitivity, not just lowercase; currently the bulk of the issue is finding the files on disk
- Test sorting by date for files not starting with date, and dated folders
- Make alt text for images also be title text
- Custom category names showing in header links
- Fix arguments - some are only callable from CLI and not Site.ini
- Fix ordering menu in Site.ini (not working for inner pages)
- Fix Python-Markdown is installed problem (to load our modules)
- Hot-recompile
- Differential recompile
- Feed generation without native libraries
- JSON feeds
- Full XML sitemap
- SCSS support
- Images in post listings
- Fix HTML (and HTM) and TXT input pages
- Fix .HTM input pages
- Fix feed titles and page title ids
- Use filename as page title if it is missing
- Hybrid global+page menu (like on documentation generators)
- Highlight of currently selected menu item
- Choosing templates for all pages in a folder
- Exporting the entire site text as JSON for full-text search tools
- Category-based feeds
- Automatic guessing of .htm/.html extension for declarations of templates and stuff
- Handle file extensions without case-sensitivity
- Proper multi-language support
- Exporting sites to different formats (?) (single-page HTML, PDF, EPUB, ...)