Filename as title for untitled pages, title attr. adition, add TXT input pages + file formats fixes

2025-06-05 22:09:23 +02:00 · 2022-08-14 17:35:58 +02:00
parent 1faf0014be
commit be37e2d845
7 changed files with 69 additions and 40 deletions
--- a/README.md
+++ b/README.md
@ -7,7 +7,12 @@ I'm making this because I need a simple and kind-of-minimal program to serve me
 This won't replace any of the big projects out there that do the same thing but, as all of my projects, I'm releasing it as free libre software, in the hope that someone would find it useful.  
 Also, this software is needed for someone to edit and compile my personal sub-website [sitoctt](https://gitlab.com/octtspacc/sitoctt) from its source. Being that site too released under a libre license that allows modifications, I have to also release the tools I use to build it.
-Feel free to experiment with all of this stuff!
+Everything is still an heavy WIP, and features might break across commits, but feel free to experiment with all of this stuff!
 ## Documentation
 Documentation, can be found at [staticoso-docs.gitlab.io](https://staticoso-docs.gitlab.io).  
 Obviously, it's built with staticoso itself 😁️. Its source repo can be found at [gitlab.com/octtspacc/staticoso-docs](https://gitlab.com/octtspacc/staticoso-docs).
 ## Dependencies
 - [Python == 3.10.4](https://python.org)
@ -22,6 +27,10 @@ Feel free to experiment with all of this stuff!
 - [html2gmi](https://github.com/LukeEmmet/html2gmi)
 ## Features roadmap
 - [ ] Configuration with both INI files and CLI arguments
 - [ ] Category-based feeds
 - [ ] Support for multi-language sites
 - [x] The `title` attribute added to images which only have `alt` (for desktop accessibility)
 - [x] Local (per-page) and global (per-site) macros
 - [x] ActivityPub (Mastodon) support (Feed + embedded comments)
 - [ ] Polished Gemtext generation
@ -44,5 +53,5 @@ Feel free to experiment with all of this stuff!
 - [x] Generation of titles in right sidebar with clickable links
 - [x] Detections of titles in a page
 - [x] Custom static page parts by template
- [x] _HTML_, _Extended Markdown_, and _Pug_ supported for input page files
+- [x] _HTML_, TXT, _Extended Markdown_, and _Pug_ supported for input page files
 - [x] Ready for use
--- a/Source/Build.py
+++ b/Source/Build.py
@ -63,7 +63,7 @@ def GetConfMenu(Entries, MarkdownExts):
 		for i in Entries:
 			e = Entries[i]
 			if not ((e.startswith('<') or e.startswith('[') or e.startswith('- ')) and (e.endswith('>') or e.endswith(')') or e.endswith('}'))):
-				if not e.lower().endswith('.html'):
+				if not (e.lower().endswith('.html') or e.lower().endswith('.htm')):
 					e += '.html'
 			Menu[int(i)] = e
 	return Menu
@ -116,10 +116,10 @@ def Main(Args, FeedEntries):
 	print("[I] Generating HTML")
 	Pages = MakeSite(
-		TemplatesText=LoadFromDir('Templates', '*.html'),
+		TemplatesText=LoadFromDir('Templates', ['*.htm', '*.html']),
-		StaticPartsText=LoadFromDir('StaticParts', '*.html'),
+		StaticPartsText=LoadFromDir('StaticParts', ['*.htm', '*.html']),
 		DynamicParts=literal_eval(Args.DynamicParts) if Args.DynamicParts else {},
-		DynamicPartsText=LoadFromDir('DynamicParts', '*.html'),
+		DynamicPartsText=LoadFromDir('DynamicParts', ['*.htm', '*.html']),
 		ConfMenu=ConfMenu,
 		GlobalMacros=ReadConf(SiteConf, 'Macros'),
 		SiteName=SiteName,
@ -190,8 +190,10 @@ def Main(Args, FeedEntries):
 			Pages,
 			Header=Args.GemtextHeader if Args.GemtextHeader else f"# {SiteName}\n\n" if SiteName else '')
-	print("[I] Last Steps")
+	print("[I] Cleaning Temporary Files")
 	DelTmp()
 	print("[I] Copying Assets")
 	os.system("cp -R Assets/* public/")
 	print("[I] Done!")
--- a/Source/Modules/HTML.py
+++ b/Source/Modules/HTML.py
@ -32,7 +32,7 @@ def StripAttrs(HTML):
 			t.attrs = {}
 	return str(Soup)
-def StripTags(HTML, ToStrip):
+def StripTags(HTML, ToStrip): # Remove desired tags from the HTML
 	Soup = MkSoup(HTML)
 	Tags = Soup.find_all()
 	for t in Tags:
@ -40,6 +40,14 @@ def StripTags(HTML, ToStrip):
 			t.replace_with('')
 	return str(Soup)
 def ImgAltToTitle(HTML): # Adds title attr. to <img> which don't have it, but have alt text
 	Soup = MkSoup(HTML)
 	Tags = Soup.find_all('img')
 	for t in Tags:
 		if 'alt' in t.attrs and 'title' not in t.attrs:
 			t.attrs.update({'title': t.attrs['alt']})
 	return str(Soup)
 def AddToTagStartEnd(HTML, MatchStart, MatchEnd, AddStart, AddEnd): # This doesn't handle nested tags
 	StartPos = None
 	for i,e in enumerate(HTML):
--- a/Source/Modules/Pug.py
+++ b/Source/Modules/Pug.py
@ -16,7 +16,7 @@ def PugCompileList(Pages):
 	# Pug-cli seems to shit itself with folder paths as input, so we pass ALL the files as arguments
 	Paths = ''
 	for File, Content, Titles, Meta in Pages:
-		if File.endswith('.pug'):
+		if File.lower().endswith('.pug'):
 			Path = 'public/{}'.format(File)
 			WriteFile(Path, Content)
 			Paths += '"{}" '.format(Path)
--- a/Source/Modules/Site.py
+++ b/Source/Modules/Site.py
@ -30,13 +30,13 @@ def MakeLinkableTitle(Line, Title, DashTitle, Type):
 		NewLine += Line[Index+2:]
 		return NewLine
-def GetTitle(Meta, Titles, Prefer='MetaTitle', BlogName=None):
+def GetTitle(FileName, Meta, Titles, Prefer='MetaTitle', BlogName=None):
 	if Prefer == 'BodyTitle':
-		Title = Titles[0].lstrip('#') if Titles else Meta['Title'] if Meta['Title'] else 'Untitled'
+		Title = Titles[0].lstrip('#') if Titles else Meta['Title'] if Meta['Title'] else FileName
 	elif Prefer == 'MetaTitle':
-		Title = Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else 'Untitled'
+		Title = Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
 	elif Prefer == 'HTMLTitle':
-		Title = Meta['HTMLTitle'] if Meta['HTMLTitle'] else Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else 'Untitled'
+		Title = Meta['HTMLTitle'] if Meta['HTMLTitle'] else Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
 	if BlogName and 'Blog' in Meta['Categories']:
 		Title += ' - ' + BlogName
 	return Title
@ -140,6 +140,7 @@ def TemplatePreprocessor(Text):
 def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
 	File = ReadFile(Path)
 	Path = Path.lower()
 	Content, Titles, DashyTitles, HTMLTitlesFound, Macros, Meta, MetaDefault = '', [], [], False, '', '', {
 		'Template': SiteTemplate,
 		'Style': '',
@ -165,7 +166,7 @@ def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
 				Macros += lll[1:].lstrip() + '\n'
 		else:
 			Headings = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
-			if Path.endswith('.html') and not HTMLTitlesFound:
+			if Path.endswith(FileExtensions['HTML']) and not HTMLTitlesFound:
 				Soup = BeautifulSoup(File, 'html.parser')
 				Tags = Soup.find_all()
 				for t in Tags:
@ -177,7 +178,7 @@ def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
 						t.replace_with(MakeLinkableTitle(None, Title, DashTitle, 'md'))
 				Content = str(Soup.prettify(formatter=None))
 				HTMLTitlesFound = True
-			elif Path.endswith('.md'):
+			elif Path.endswith(FileExtensions['Markdown']):
 				if ll.startswith('#'):
 					DashTitle = DashifyTitle(l.lstrip('#'), DashyTitles)
 					DashyTitles += [DashTitle]
@ -201,6 +202,8 @@ def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros):
 							Content += MakeLinkableTitle(l, Title, DashTitle, 'pug') + '\n'
 				else:
 					Content += l + '\n'
 			elif Path.endswith('.txt'):
 				Content += l + '\n'
 	Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
 	for i in MetaDefault:
 		if i in Meta:
@ -229,7 +232,7 @@ def PagePostprocessor(FileType, Text, Meta):
 	return Text
 def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix=''):
-	Title = GetTitle(Meta, Titles, Prefer, BlogName)
+	Title = GetTitle(File.split('/')[-1], Meta, Titles, Prefer, BlogName)
 	Link = False if Meta['Index'] == 'Unlinked' else True
 	if Link:
 		Title = '[{}]({})'.format(
@ -282,17 +285,18 @@ def CanIndex(Index, For):
 def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale):
 	HTMLTitles = FormatTitles(Titles)
 	BodyDescription, BodyImage = '', ''
-	Soup = BeautifulSoup(Content, 'html.parser')
+	if not File.lower().endswith('.txt'):
 		Soup = BeautifulSoup(Content, 'html.parser')
-	if not BodyDescription and Soup.p:
+		if not BodyDescription and Soup.p:
-		BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
+			BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
-	if not BodyImage and Soup.img and Soup.img['src']:
+		if not BodyImage and Soup.img and Soup.img['src']:
-		BodyImage = Soup.img['src']
+			BodyImage = Soup.img['src']
-	#Content = SquareFnrefs(Content)
+		#Content = SquareFnrefs(Content)
-	Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')
+		Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')
-	Title = GetTitle(Meta, Titles, 'MetaTitle', BlogName)
+	Title = GetTitle(File.split('/')[-1], Meta, Titles, 'MetaTitle', BlogName)
 	Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
 	Image = GetImage(Meta, BodyImage, 'MetaImage')
@ -439,10 +443,14 @@ def MakeSite(TemplatesText, StaticPartsText, DynamicParts, DynamicPartsText, Con
 	print("[I] Writing Pages")
 	for File, Content, Titles, Meta in Pages:
 		PagePath = 'public/{}.html'.format(StripExt(File))
-		if File.endswith('.md'):
+		if File.lower().endswith(('.markdown', '.md')):
 			Content = markdown(PagePostprocessor('md', Content, Meta), extensions=MarkdownExts)
-		elif File.endswith(('.pug')):
+		elif File.lower().endswith(('.pug')):
 			Content = PagePostprocessor('pug', ReadFile(PagePath), Meta)
 		elif File.lower().endswith(('.txt')):
 			Content = '<pre>' + Content + '</pre>'
 		elif File.lower().endswith(('.htm', '.html')):
 			Content = ReadFile(PagePath)
 		TemplateMeta = TemplatePreprocessor(TemplatesText[Meta['Template']])
 		HTMLPagesList = GetHTMLPagesList(
@ -474,6 +482,8 @@ def MakeSite(TemplatesText, StaticPartsText, DynamicParts, DynamicPartsText, Con
 			Categories=Categories,
 			SiteLang=SiteLang,
 			Locale=Locale)
 		HTML = ImgAltToTitle(HTML)
 		if NoScripts:
 			HTML = StripTags(HTML, ['script'])
 		if Minify:
--- a/Source/Modules/Utils.py
+++ b/Source/Modules/Utils.py
@ -13,8 +13,10 @@ from datetime import datetime
 from pathlib import Path
 FileExtensions = {
-	'Pages': ('htm', 'html', 'md', 'pug', 'txt'),
+	'Pages': ('htm', 'html', 'markdown', 'md', 'pug', 'txt'),
-	'Tmp': ('md', 'pug', 'txt')}
+	'HTML': ('.htm', '.html'),
 	'Markdown': ('.markdown', '.md'),
 	'Tmp': ('htm', 'markdown', 'md', 'pug', 'txt')}
 def ReadFile(p):
 	try:
@ -40,11 +42,14 @@ def FileToStr(File, Truncate=''):
 def IgnoreFiles(Dir, Files):
    return [f for f in Files if os.path.isfile(os.path.join(Dir, f))]
-def LoadFromDir(Dir, Rglob):
+def LoadFromDir(Dir, Matchs):
 	Contents = {}
-	for File in Path(Dir).rglob(Rglob):
+	if type(Matchs) != list:
-		File = str(File)[len(Dir)+1:]
+		Matchs = [Matchs]
-		Contents.update({File: ReadFile('{}/{}'.format(Dir, File))})
+	for Match in Matchs:
 		for File in Path(Dir).rglob(Match):
 			File = str(File)[len(Dir)+1:]
 			Contents.update({File: ReadFile('{}/{}'.format(Dir, File))})
 	return Contents
 def StripExt(Path):
--- a/11
+++ b/11
@ -1,24 +1,19 @@
 - Handle file extensions with any case sensitivity, not just lowercase; currently the bulk of the issue is finding the files on disk
 - Test sorting by date for files not starting with date, and dated folders
 - Make alt text for images also be title text
 - Custom category names showing in header links
 - Fix arguments - some are only callable from CLI and not Site.ini
 - Fix ordering menu in Site.ini (not working for inner pages)
 - Fix Python-Markdown is installed problem (to load our modules)
 - Hot-recompile
 - Differential recompile
 - Feed generation without native libraries
 - JSON feeds
 - Full XML sitemap
 - SCSS support
 - Images in post listings
- Fix HTML (and HTM) and TXT input pages
+- Fix .HTM input pages
 - Fix feed titles and page title ids
 - Use filename as page title if it is missing
 - Hybrid global+page menu (like on documentation generators)
 - Highlight of currently selected menu item
 - Choosing templates for all pages in a folder
 - Exporting the entire site text as JSON for full-text search tools
 - Category-based feeds
 - Automatic guessing of .htm/.html extension for declarations of templates and stuff
 - Handle file extensions without case-sensitivity
 - Proper multi-language support
 - Exporting sites to different formats (?) (single-page HTML, PDF, EPUB, ...)