staticoso/Source/Modules/Site.py

531 lines
19 KiB
Python

""" ================================= |
| This file is part of |
| staticoso |
| Just a simple Static Site Generator |
| |
| Licensed under the AGPLv3 license |
| Copyright (C) 2022, OctoSpacc |
| ================================= """
from datetime import datetime
from Libs.bs4 import BeautifulSoup
from Modules.Config import *
from Modules.HTML import *
from Modules.Markdown import *
from Modules.Pug import *
from Modules.Utils import *
HTMLSectionTitleLine = '<h{Index} class="SectionHeading"><span class="SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
#PugSectionTitleLine = "{Line[:Index]}{Line[Index:Index+2]}.SectionHeading #[span.SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle {Line[Index+2:]}]"
CategoryPageTemplate = """\
// Title: {Name}
// Type: Page
// Index: True
# {Name}
<div>[staticoso:Category:{Name}]</div>
"""
def DashifyTitle(Title, Done=[]):
return UndupeStr(DashifyStr(Title.lstrip(' ').rstrip(' ')), Done, '-')
def MakeLinkableTitle(Line, Title, DashTitle, Type):
if Type == 'md':
Index = Title.split(' ')[0].count('#')
return HTMLSectionTitleLine.format(
Index=Index,
DashTitle=DashTitle,
Title=Title[Index+1:])
elif Type == 'pug':
Index = Line.find('h')
return f"{Line[:Index]}{Line[Index:Index+2]}.SectionHeading #[span.SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle {Line[Index+2:]}]"
def GetTitle(FileName, Meta, Titles, Prefer='MetaTitle', BlogName=None):
if Prefer == 'BodyTitle':
Title = Titles[0].lstrip('#') if Titles else Meta['Title'] if Meta['Title'] else FileName
elif Prefer == 'MetaTitle':
Title = Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
elif Prefer == 'HTMLTitle':
Title = Meta['HTMLTitle'] if Meta['HTMLTitle'] else Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
if BlogName and 'Blog' in Meta['Categories']:
Title += ' - ' + BlogName
return Title
def GetDescription(Meta, BodyDescription, Prefer='MetaDescription'):
if Prefer == 'BodyDescription':
Description = BodyDescription if BodyDescription else Meta['Description'] if Meta['Description'] else ''
elif Prefer == 'MetaDescription':
Description = Meta['Description'] if Meta['Description'] else BodyDescription if BodyDescription else ''
return Description
def GetImage(Meta, BodyImage, Prefer='MetaImage'):
if Prefer == 'BodyImage':
Image = BodyImage if BodyImage else Meta['Image'] if Meta['Image'] else ''
elif Prefer == 'MetaImage':
Image = Meta['Image'] if Meta['Image'] else BodyImage if BodyImage else ''
return Image
def MakeContentHeader(Meta, Locale, Categories=''):
Header = ''
for i in ['CreatedOn', 'EditedOn']:
if Meta[i]:
Header += f"{Locale[i]}: {Meta[i]} \n"
if Categories:
Header += f"{Locale['Categories']}: {Categories} \n"
return markdown(Header.rstrip())
def MakeCategoryLine(File, Meta):
Categories = ''
if Meta['Categories']:
for i in Meta['Categories']:
Categories += f" [{i}]({GetPathLevels(File)}Categories/{i}.html) "
return Categories
def GetHTMLPagesList(Pages, BlogName, SiteRoot, PathPrefix, Unite=[], Type='Page', Category=None, For='Menu', MarkdownExts=(), MenuStyle='Default'):
ShowPaths, Flatten, SingleLine = True, False, False
if MenuStyle == 'Flat':
Flatten = True
elif MenuStyle == 'Line':
ShowPaths, SingleLine = False, True
List, ToPop, LastParent = '', [], []
IndexPages = Pages.copy()
for e in IndexPages:
if e[3]['Index'] == 'False' or e[3]['Index'] == 'None':
IndexPages.remove(e)
for i,e in enumerate(IndexPages):
if e[3]['Type'] != Type:
ToPop += [i]
ToPop = RevSort(ToPop)
for i in ToPop:
IndexPages.pop(i)
if Type == 'Page':
IndexPages = OrderPages(IndexPages)
for i,e in enumerate(Unite):
if e:
IndexPages.insert(i,[e,None,None,{'Type':Type,'Index':'True','Order':'Unite'}])
for File, Content, Titles, Meta in IndexPages:
if Meta['Type'] == Type and CanIndex(Meta['Index'], For) and (not Category or Category in Meta['Categories']):
Depth = (File.count('/') + 1) if Meta['Order'] != 'Unite' else 1
if Depth > 1 and Meta['Order'] != 'Unite': # Folder names are handled here
CurParent = File.split('/')[:-1]
for i,s in enumerate(CurParent):
if LastParent != CurParent and ShowPaths:
LastParent = CurParent
Levels = '- ' * ((Depth-1+i) if not Flatten else 1)
# Folders with else without an index file
if StripExt(File).endswith('index'):
Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, BlogName, PathPrefix)
else:
Title = CurParent[Depth-2+i]
if SingleLine:
List += ' <span>' + Title + '</span> '
else:
List += Levels + Title + '\n'
if not (Depth > 1 and StripExt(File).split('/')[-1] == 'index'):
Levels = '- ' * (Depth if not Flatten else 1)
if Meta['Order'] == 'Unite':
Title = File
else:
Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, BlogName, PathPrefix)
if SingleLine:
List += ' <span>' + Title + '</span> '
else:
List += Levels + Title + '\n'
return markdown(MarkdownHTMLEscape(List, MarkdownExts), extensions=MarkdownExts)
def TemplatePreprocessor(Text):
Meta, MetaDefault = '', {
'MenuStyle': 'Default'}
for l in Text.splitlines():
ll = l.lstrip()
if ll.startswith('<!--'):
lll = ll[4:].lstrip().rstrip()
if lll.startswith('%') and lll.endswith('-->'):
Meta += lll[1:-3].lstrip().rstrip() + '\n'
Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
for i in MetaDefault:
if not i in Meta:
Meta.update({i:MetaDefault[i]})
return Meta
def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=False):
File = ReadFile(Path)
Path = Path.lower()
Content, Titles, DashyTitles, HTMLTitlesFound, Macros, Meta, MetaDefault = '', [], [], False, '', '', {
'Template': SiteTemplate,
'Style': '',
'Type': Type,
'Index': 'Unspecified',
'Feed': 'True',
'Title': '',
'HTMLTitle': '',
'Description': '',
'Image': '',
'Macros': {},
'Categories': [],
'CreatedOn': '',
'EditedOn': '',
'Order': None}
for l in File.splitlines():
ll = l.lstrip()
if ll.startswith('//'):
lll = ll[2:].lstrip()
if lll.startswith('%'):
Meta += lll[1:].lstrip() + '\n'
elif lll.startswith('$'):
Macros += lll[1:].lstrip() + '\n'
else:
Headings = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
if Path.endswith(FileExtensions['HTML']) and not HTMLTitlesFound:
Soup = BeautifulSoup(File, 'html.parser')
Tags = Soup.find_all()
for t in Tags:
if t.name in Headings:
Title = '#'*int(t.name[1]) + ' ' + str(t.text)
DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
DashyTitles += [DashTitle]
Titles += [Title]
t.replace_with(MakeLinkableTitle(None, Title, DashTitle, 'md'))
Content = str(Soup.prettify(formatter=None))
HTMLTitlesFound = True
elif Path.endswith(FileExtensions['Markdown']):
if ll.startswith('#') or (ll.startswith('<') and ll[1:].startswith(Headings)):
if ll.startswith('#'):
Title = ll
#Index = Title.split(' ')[0].count('#')
elif ll.startswith('<'):
#Index = int(ll[2])
Title = '#'*h + str(ll[3:])
DashTitle = DashifyTitle(MkSoup(Title.lstrip('#')).get_text(), DashyTitles)
DashyTitles += [DashTitle]
Titles += [Title]
Title = MakeLinkableTitle(None, Title, DashTitle, 'md')
Title = Title.replace('> </', '> </')
Title = Title.replace(' </', '</')
Content += Title + '\n'
else:
Content += l + '\n'
elif Path.endswith('.pug'):
if ll.startswith(Headings):
if ll[2:].startswith(("(class='NoTitle", '(class="NoTitle')):
Content += l + '\n'
else:
Title = '#'*int(ll[1]) + str(ll[3:])
DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
DashyTitles += [DashTitle]
Titles += [Title]
# TODO: We should handle headers that for any reason already have parenthesis
if ll[2:] == '(':
Content += l + '\n'
else:
Content += MakeLinkableTitle(l, Title, DashTitle, 'pug') + '\n'
else:
Content += l + '\n'
elif Path.endswith('.txt'):
Content += l + '\n'
Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
for i in MetaDefault:
if i in Meta:
if i == 'Categories':
Categories = Meta['Categories'].split(' ')
Meta['Categories'] = []
for j in Categories:
Meta['Categories'] += [j]
else:
Meta.update({i:MetaDefault[i]})
if Meta['Index'] in ('Default', 'Unspecified'):
if not Meta['Categories']:
Meta['Categories'] = [CategoryUncategorized]
if Meta['Type'] == 'Page':
Meta['Index'] = 'False'
elif Meta['Type'] == 'Post':
Meta['Index'] = 'True'
if GlobalMacros:
Meta['Macros'].update(GlobalMacros)
Meta['Macros'].update(ReadConf(LoadConfStr('[Macros]\n' + Macros), 'Macros'))
return Content, Titles, Meta
def PagePostprocessor(FileType, Text, Meta):
for e in Meta['Macros']:
Text = ReplWithEsc(Text, f"[: {e} :]", f"[:{e}:]")
return Text
def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix=''):
Title = GetTitle(File.split('/')[-1], Meta, Titles, Prefer, BlogName)
Link = False if Meta['Index'] == 'Unlinked' else True
if Link:
Title = '[{}]({})'.format(
Title,
'{}{}.html'.format(PathPrefix, StripExt(File)))
if Meta['Type'] == 'Post':
CreatedOn = Meta['CreatedOn'] if Meta['CreatedOn'] else '?'
Title = f"[{CreatedOn}] {Title}"
return Title
def FormatTitles(Titles, Flatten=False):
# TODO: Somehow titles written in Pug can end up here and don't work, they should be handled
MDTitles, DashyTitles = '', []
for t in Titles:
n = t.split(' ')[0].count('#')
Heading = '- ' * (n if not Flatten else 1)
Title = MkSoup(t.lstrip('#')).get_text()
DashyTitle = DashifyTitle(Title, DashyTitles)
DashyTitles += [DashyTitle]
Title = f"[{Title}](#{DashyTitle})"
MDTitles += Heading + Title + '\n'
return markdown(MDTitles)
def OrderPages(Old):
New, NoOrder, Max = [], [], 0
for i,e in enumerate(Old):
Curr = e[3]['Order']
if Curr:
if int(Curr) > Max:
Max = int(Curr)
else:
NoOrder += [e]
New = [None] * (Max+1)
for i,e in enumerate(Old):
Curr = e[3]['Order']
if Curr:
New[int(Curr)] = e
while None in New:
New.remove(None)
return New + NoOrder
def CanIndex(Index, For):
if Index in ('False', 'None'):
return False
elif Index in ('True', 'All', 'Unlinked'):
return True
else:
return True if Index == For else False
def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale, LightRun):
HTMLTitles = FormatTitles(Titles)
BodyDescription, BodyImage = '', ''
if not File.lower().endswith('.txt'):
Soup = BeautifulSoup(Content, 'html.parser')
if not BodyDescription and Soup.p:
BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
if not BodyImage and Soup.img and Soup.img['src']:
BodyImage = Soup.img['src']
#Content = SquareFnrefs(Content)
Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')
Title = GetTitle(File.split('/')[-1], Meta, Titles, 'MetaTitle', BlogName)
Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
Image = GetImage(Meta, BodyImage, 'MetaImage')
for Line in HTML.splitlines():
Line = Line.lstrip().rstrip()
if Line.startswith('[staticoso:DynamicPart:') and Line.endswith(']'):
Path = Line[len('[staticoso:DynamicPart:'):-1]
Section = Path.split('/')[-1]
if Section in DynamicParts:
Part = DynamicParts[Section]
Text = ''
if type(Part) == list:
for e in Part:
Text += DynamicPartsText[f"{Path}/{e}"] + '\n'
elif type(Part) == str:
Text = DynamicPartsText[f"{Path}/{Part}"]
else:
Text = ''
HTML = ReplWithEsc(HTML, f"[staticoso:DynamicPart:{Path}]", Text)
for e in StaticPartsText:
HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])
if LightRun:
HTML = None
else:
HTML = DictReplWithEsc(
HTML, {
'[staticoso:Site:Menu]': HTMLPagesList,
'[staticoso:Page:Lang]': SiteLang,
'[staticoso:Page:Chapters]': HTMLTitles,
'[staticoso:Page:Title]': Title,
'[staticoso:Page:Description]': Description,
'[staticoso:Page:Image]': Image,
'[staticoso:Page:Path]': PagePath,
'[staticoso:Page:Style]': Meta['Style'],
'[staticoso:Page:Content]': Content,
'[staticoso:Page:ContentInfo]': MakeContentHeader(Meta, Locale, MakeCategoryLine(File, Meta)),
'[staticoso:BuildTime]': datetime.now().strftime('%Y-%m-%d %H:%M'),
'[staticoso:Site:Name]': SiteName,
'[staticoso:Site:AbsoluteRoot]': SiteRoot,
'[staticoso:Site:RelativeRoot]': GetPathLevels(PagePath)
})
for e in Meta['Macros']:
HTML = ReplWithEsc(HTML, f"[:{e}:]", Meta['Macros'][e])
for e in FolderRoots:
HTML = ReplWithEsc(HTML, f"[staticoso:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
for e in Categories:
HTML = ReplWithEsc(HTML, f"<span>[staticoso:Category:{e}]</span>", Categories[e])
HTML = ReplWithEsc(HTML, f"[staticoso:Category:{e}]", Categories[e])
# TODO: Clean this doubling?
ContentHTML = Content
ContentHTML = DictReplWithEsc(
ContentHTML, {
'[staticoso:Page:Title]': Title,
'[staticoso:Page:Description]': Description,
'[staticoso:Site:Name]': SiteName,
'[staticoso:Site:AbsoluteRoot]': SiteRoot,
'[staticoso:Site:RelativeRoot]': GetPathLevels(PagePath)
})
for e in Meta['Macros']:
ContentHTML = ReplWithEsc(ContentHTML, f"[:{e}:]", Meta['Macros'][e])
for e in FolderRoots:
ContentHTML = ReplWithEsc(ContentHTML, f"[staticoso:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
for e in Categories:
ContentHTML = ReplWithEsc(ContentHTML, f"<span>[staticoso:Category:{e}]</span>", Categories[e])
ContentHTML = ReplWithEsc(ContentHTML, f"[staticoso:Category:{e}]", Categories[e])
return HTML, ContentHTML, Description, Image
def MakeSite(OutputDir, LimitFiles, TemplatesText, StaticPartsText, DynamicParts, DynamicPartsText, ConfMenu, GlobalMacros, SiteName, BlogName, SiteTagline, SiteTemplate, SiteDomain, SiteRoot, FolderRoots, SiteLang, Locale, Minify, MinifyKeepComments, NoScripts, ImgAltToTitle, ImgTitleToAlt, Sorting, MarkdownExts, AutoCategories, CategoryUncategorized):
PagesPaths, PostsPaths, Pages, MadePages, Categories = [], [], [], [], {}
for Ext in FileExtensions['Pages']:
for File in Path('Pages').rglob(f"*.{Ext}"):
PagesPaths += [FileToStr(File, 'Pages/')]
for File in Path('Posts').rglob(f"*.{Ext}"):
PostsPaths += [FileToStr(File, 'Posts/')]
PagesPaths = FileNameDateSort(PagesPaths)
if Sorting['Pages'] == 'Inverse':
PagesPaths.reverse()
PostsPaths = FileNameDateSort(PostsPaths)
if Sorting['Posts'] == 'Inverse':
PostsPaths.reverse()
print("[I] Preprocessing Source Pages")
for Type in ['Page', 'Post']:
if Type == 'Page':
Files = PagesPaths
PathPrefix = ''
elif Type == 'Post':
Files = PostsPaths
PathPrefix = 'Posts/'
for File in Files:
TempPath = f"{PathPrefix}{File}"
LightRun = False if LimitFiles == False or TempPath in LimitFiles else True
Content, Titles, Meta = PagePreprocessor(f"{Type}s/{File}", Type, SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=LightRun)
Pages += [[TempPath, Content, Titles, Meta]]
for Cat in Meta['Categories']:
Categories.update({Cat:''})
PugCompileList(OutputDir, Pages, LimitFiles)
if Categories:
print("[I] Generating Category Lists")
for Cat in Categories:
for Type in ('Page', 'Post'):
Categories[Cat] += GetHTMLPagesList(
Pages=Pages,
BlogName=BlogName,
SiteRoot=SiteRoot,
PathPrefix=GetPathLevels('Categories/'),
Type=Type,
Category=Cat,
For='Categories',
MarkdownExts=MarkdownExts,
MenuStyle='Flat')
if AutoCategories:
Dir = f"{OutputDir}/Categories"
for Cat in Categories:
Exists = False
for File in Path(Dir).rglob(str(Cat)+'.*'):
Exists = True
break
if not Exists:
File = f"Categories/{Cat}.md"
FilePath = f"{OutputDir}/{File}"
WriteFile(FilePath, CategoryPageTemplate.format(Title=Cat))
Content, Titles, Meta = PagePreprocessor(FilePath, 'Page', SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=LightRun)
Pages += [[File, Content, Titles, Meta]]
for i,e in enumerate(ConfMenu):
for File, Content, Titles, Meta in Pages:
File = StripExt(File)+'.html'
if e == File:
ConfMenu[i] = None
print("[I] Writing Pages")
for File, Content, Titles, Meta in Pages:
LightRun = False if LimitFiles == False or File in LimitFiles else True
PagePath = f"{OutputDir}/{StripExt(File)}.html"
if File.lower().endswith(FileExtensions['Markdown']):
Content = markdown(PagePostprocessor('md', Content, Meta), extensions=MarkdownExts)
elif File.lower().endswith(('.pug')):
Content = PagePostprocessor('pug', ReadFile(PagePath), Meta)
elif File.lower().endswith(('.txt')):
Content = '<pre>' + html.escape(Content) + '</pre>'
elif File.lower().endswith(FileExtensions['HTML']):
Content = ReadFile(PagePath)
if LightRun:
HTMLPagesList = None
else:
TemplateMeta = TemplatePreprocessor(TemplatesText[Meta['Template']])
HTMLPagesList = GetHTMLPagesList(
Pages=Pages,
BlogName=BlogName,
SiteRoot=SiteRoot,
PathPrefix=GetPathLevels(File),
Unite=ConfMenu,
Type='Page',
For='Menu',
MarkdownExts=MarkdownExts,
MenuStyle=TemplateMeta['MenuStyle'])
HTML, ContentHTML, Description, Image = PatchHTML(
File=File,
HTML=TemplatesText[Meta['Template']],
StaticPartsText=StaticPartsText,
DynamicParts=DynamicParts,
DynamicPartsText=DynamicPartsText,
HTMLPagesList=HTMLPagesList,
PagePath=PagePath[len(f"{OutputDir}/"):],
Content=Content,
Titles=Titles,
Meta=Meta,
SiteRoot=SiteRoot,
SiteName=SiteName,
BlogName=BlogName,
FolderRoots=FolderRoots,
Categories=Categories,
SiteLang=SiteLang,
Locale=Locale,
LightRun=LightRun)
if Minify:
if not LightRun:
HTML = DoMinifyHTML(HTML, MinifyKeepComments)
ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
if NoScripts:
if not LightRun:
HTML = StripTags(HTML, ['script'])
ContentHTML = StripTags(ContentHTML, ['script'])
if ImgAltToTitle or ImgTitleToAlt:
if not LightRun:
HTML = WriteImgAltAndTitle(HTML, ImgAltToTitle, ImgTitleToAlt)
ContentHTML = WriteImgAltAndTitle(ContentHTML, ImgAltToTitle, ImgTitleToAlt)
if LightRun:
SlimHTML = None
else:
SlimHTML = HTMLPagesList + ContentHTML
if not LightRun:
WriteFile(PagePath, HTML)
MadePages += [[File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image]]
return MadePages