""" ================================= | | This file is part of | | staticoso | | Just a simple Static Site Generator | | | | Licensed under the AGPLv3 license | | Copyright (C) 2022, OctoSpacc | | ================================= """ import shutil from datetime import datetime from multiprocessing import Pool, cpu_count from Libs.bs4 import BeautifulSoup from Modules.Config import * from Modules.Elements import * from Modules.HTML import * from Modules.Logging import * from Modules.Markdown import * from Modules.Pug import * from Modules.Utils import * # Menu styles: # - Simple: Default, Flat, Line # - Others: Excerpt, Image, Preview (Excerpt + Image), Full def GetHTMLPagesList(Pages, BlogName, SiteRoot, PathPrefix, CallbackFile=None, Unite=[], Type=None, Limit=None, PathFilter='', Category=None, For='Menu', MarkdownExts=(), MenuStyle='Default', ShowPaths=True): Flatten, SingleLine, DoneCount, PrevDepth = False, False, 0, 0 if MenuStyle == 'Flat': Flatten = True elif MenuStyle == 'Line': ShowPaths, SingleLine = False, True List, ToPop, LastParent = '', [], [] IndexPages = Pages.copy() for e in IndexPages: if e[3]['Index'] == 'False' or e[3]['Index'] == 'None': IndexPages.remove(e) for i,e in enumerate(IndexPages): if Type and e[3]['Type'] != Type: ToPop += [i] ToPop = RevSort(ToPop) for i in ToPop: IndexPages.pop(i) if Type == 'Page': IndexPages = OrderPages(IndexPages) for i,e in enumerate(Unite): if e: IndexPages.insert(i,[e,None,None,{'Type':Type,'Index':'True','Order':'Unite'}]) for File, Content, Titles, Meta in IndexPages: # Allow for the virtual "Pages/" prefix to be used in path filtering TmpPathFilter = PathFilter if TmpPathFilter.startswith('Pages/'): TmpPathFilter = TmpPathFilter[len('Pages/'):] if File.startswith('Posts/'): continue if (not Type or (Meta['Type'] == Type and CanIndex(Meta['Index'], For))) and (not Category or Category in Meta['Categories']) and File.startswith(TmpPathFilter) and File != CallbackFile and (not Limit or Limit > DoneCount): Depth = (File.count('/') + 1) if Meta['Order'] != 'Unite' else 1 # Folder names are handled here if Depth > 1 and Meta['Order'] != 'Unite': CurParent = File.split('/')[:-1] for i,s in enumerate(CurParent): if LastParent != CurParent and ShowPaths: LastParent = CurParent Levels = '.' * ((Depth-2+i) if not Flatten else 0) + ':' # If search node endswith index, it's a page; else, it's a folder if StripExt(File).endswith('index'): Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, BlogName, PathPrefix) DoneCount += 1 else: Title = CurParent[Depth-2+i] if SingleLine: List += ' ' + Title + ' ' else: List += Levels + Title + '\n' # Pages with any other path if not (Depth > 1 and StripExt(File).split('/')[-1] == 'index'): Levels = '.' * ((Depth-1) if not Flatten else 0) + ':' DoneCount += 1 if Meta['Order'] == 'Unite': Title = markdown(MarkdownHTMLEscape(File, MarkdownExts), extensions=MarkdownExts).removeprefix('
').removesuffix('
')
else:
Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, BlogName, PathPrefix)
if SingleLine:
List += ' ' + Title + ' '
else:
List += Levels + Title + '\n'
if MenuStyle in ('Default', 'Flat'):
return GenHTMLTreeList(List)
elif MenuStyle in ('Line', 'Excerpt', 'Image', 'Preview', 'Full'):
return List
def CheckHTMLCommentLine(Line):
if Line.startswith(''):
return Line
return None
def TemplatePreprocessor(Text):
Meta, MetaDefault = '', {
'MenuStyle': 'Default'}
for l in Text.splitlines():
ll = l.lstrip().rstrip()
lll = CheckHTMLCommentLine(ll)
if lll:
if lll.startswith('%'):
Meta += lll[1:-3].lstrip().rstrip() + '\n'
Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
for i in MetaDefault:
if not i in Meta:
Meta.update({i:MetaDefault[i]})
return Meta
def FindPreprocLine(Line, Meta, Macros):
Changed = False
Line = Line.lstrip().rstrip()
lll = CheckHTMLCommentLine(Line)
if Line.startswith('//') or lll: # Find preprocessor lines
lll = Line[2:].lstrip()
if lll.startswith('%'):
Meta += lll[1:].lstrip() + '\n'
Changed = True
elif lll.startswith('$'):
Macros += lll[1:].lstrip() + '\n'
Changed = True
#if ll.startswith(''): # Find comment and code blocks
# IgnoreBlocksStart += [l]
return (Meta, Macros, Changed)
def PagePreprocessor(Path, TempPath, Type, SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=False):
File = ReadFile(Path)
Path = Path.lower()
Content, Titles, DashyTitles, HTMLTitlesFound, Macros, Meta, MetaDefault = '', [], [], False, '', '', {
'Template': SiteTemplate,
'Style': '',
'Type': Type,
'Index': 'Unspecified',
'Feed': 'True',
'Title': '',
'HTMLTitle': '',
'Description': '',
'Image': '',
'Macros': {},
'Categories': [],
'URLs': [],
'CreatedOn': '',
'UpdatedOn': '',
'EditedOn': '',
'Order': None,
'Language': None,
'Downsync': None}
# Find all positions of '', add them in a list=[[pos0,pos1,line0,line1],...]
for l in File.splitlines():
ll = l.lstrip().rstrip()
Meta, Macros, Changed = FindPreprocLine(ll, Meta, Macros)
if not Changed: # Find headings
#if line in ignore block:
# continue
Headings = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
#if Path.endswith(FileExtensions['HTML']):
# if ll[1:].startswith(Headings):
# if ll[3:].startswith((" class='NoTitle", ' class="NoTitle')):
# Content += l + '\n'
# elif ll.replace(' ', ' ').startswith('// %'):
# pass
# else:
# Title = '#'*int(ll[2]) + ' ' + ll[4:]
# DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
# DashyTitles += [DashTitle]
# Titles += [Title]
# Content += MakeLinkableTitle(l, Title, DashTitle, 'pug') + '\n'
# else:
# Content += l + '\n'
if Path.endswith(FileExtensions['HTML']) and not HTMLTitlesFound:
Soup = BeautifulSoup(File, 'html.parser')
Tags = Soup.find_all()
for t in Tags:
if t.name in Headings:
Title = '#'*int(t.name[1]) + ' ' + str(t.text)
DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
DashyTitles += [DashTitle]
Titles += [Title]
t.replace_with(MakeLinkableTitle(None, Title, DashTitle, 'md'))
HTMLTitlesFound = True
Content = ''
TmpContent = str(Soup.prettify(formatter=None))
for cl in TmpContent.splitlines():
_, _, IsMetaLine = FindPreprocLine(cl, Meta, Macros)
if not IsMetaLine:
#print(cl)
Content += cl + '\n'
break
elif Path.endswith(FileExtensions['Markdown']):
lsuffix = ''
if ll.startswith(('-', '+', '*')):
lsuffix += ll[0]
ll = ll[1:].lstrip()
if ll.startswith('#') or (ll.startswith('<') and ll[1:].startswith(Headings)):
if ll.startswith('#'):
Title = ll
elif ll.startswith('<'):
if ll[3:].startswith((" class='NoTitle", ' class="NoTitle')):
Content += l + '\n'
continue
else:
Title = '#'*int(ll[2]) + ' ' + ll[4:]
DashTitle = DashifyTitle(MkSoup(Title.lstrip('#')).get_text(), DashyTitles)
DashyTitles += [DashTitle]
Titles += [Title]
Title = MakeLinkableTitle(None, Title, DashTitle, 'md')
# I can't remember why I put this but it was needed
Title = Title.replace('> ', '> ').replace(' ', '')
Content += lsuffix + Title + '\n'
else:
Content += l + '\n'
elif Path.endswith('.pug'):
if ll.startswith(Headings):
if ll[2:].startswith(("(class='NoTitle", '(class="NoTitle')):
Content += l + '\n'
else:
Title = '#'*int(ll[1]) + ll[3:]
DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
DashyTitles += [DashTitle]
Titles += [Title]
# TODO: We should handle headers that for any reason already have parenthesis
if ll[2:] == '(':
Content += l + '\n'
else:
Content += MakeLinkableTitle(l, Title, DashTitle, 'pug') + '\n'
else:
Content += l + '\n'
elif Path.endswith('.txt'):
Content += l + '\n'
Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
for i in MetaDefault:
if i in Meta:
# TODO: Handle strings with spaces but wrapped in quotes
if i == 'Categories':
Categories = Meta['Categories'].split(' ')
Meta['Categories'] = []
for j in Categories:
Meta['Categories'] += [j]
elif i == 'URLs':
URLs = Meta['URLs'].split(' ')
Meta['URLs'] = []
for j in URLs:
Meta['URLs'] += [j]
else:
Meta.update({i:MetaDefault[i]})
if Meta['UpdatedOn']:
Meta['EditedOn'] = Meta['UpdatedOn']
if Meta['Index'] in ('Default', 'Unspecified', 'Categories'):
if not Meta['Categories']:
Meta['Categories'] = [CategoryUncategorized]
if Meta['Type'] == 'Page':
Meta['Index'] = 'Categories'
elif Meta['Type'] == 'Post':
Meta['Index'] = 'True'
if GlobalMacros:
Meta['Macros'].update(GlobalMacros)
Meta['Macros'].update(ReadConf(LoadConfStr('[Macros]\n' + Macros), 'Macros'))
return [TempPath, Content, Titles, Meta]
def PagePostprocessor(FileType, Text, Meta):
for e in Meta['Macros']:
Text = ReplWithEsc(Text, f"[: {e} :]", f"[:{e}:]")
return Text
def OrderPages(Old):
New, NoOrder, Max = [], [], 0
for i,e in enumerate(Old):
Curr = e[3]['Order']
if Curr:
if int(Curr) > Max:
Max = int(Curr)
else:
NoOrder += [e]
New = [None] * (Max+1)
for i,e in enumerate(Old):
Curr = e[3]['Order']
if Curr:
New[int(Curr)] = e
while None in New:
New.remove(None)
return New + NoOrder
def CanIndex(Index, For):
if Index in ('False', 'None'):
return False
elif Index in ('True', 'All', 'Unlinked'):
return True
else:
return True if Index == For else False
def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteDomain, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale, LightRun):
HTMLTitles = FormatTitles(Titles)
BodyDescription, BodyImage = '', ''
if not File.lower().endswith('.txt'):
Soup = BeautifulSoup(Content, 'html.parser')
if not BodyDescription and Soup.p:
BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
if not BodyImage and Soup.img and Soup.img['src']:
BodyImage = Soup.img['src']
#Content = SquareFnrefs(Content)
if '', '[', ']')
if any(_ in Content for _ in ('', ' noprocess --->', '', '')):
Content = DictReplWithEsc(
Content, {
'': '',
' noprocess --->': '',
'': '',
'': ''})
Title = GetTitle(File.split('/')[-1], Meta, Titles, 'MetaTitle', BlogName)
Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
Image = GetImage(Meta, BodyImage, 'MetaImage')
ContentHeader = MakeContentHeader(Meta, Locale, MakeCategoryLine(File, Meta))
TimeNow = datetime.now().strftime('%Y-%m-%d %H:%M')
RelativeRoot = GetPathLevels(PagePath)
if 'staticoso:DynamicPart:' in HTML: # Reduce risk of unnecessary cycles
for Line in HTML.splitlines():
Line = Line.lstrip().rstrip()
if (Line.startswith('[staticoso:DynamicPart:') and Line.endswith(']')) or (Line.startswith('' + html.escape(Content) + '
'
elif FileLower.endswith(FileExtensions['HTML']):
Content = ReadFile(PagePath)
if LightRun:
HTMLPagesList = None
else:
TemplateMeta = TemplatePreprocessor(TemplatesText[Meta['Template']])
HTMLPagesList = GetHTMLPagesList(
Pages=Pages,
BlogName=BlogName,
SiteRoot=SiteRoot,
PathPrefix=GetPathLevels(File),
Unite=ConfMenu,
Type='Page',
For='Menu',
MarkdownExts=MarkdownExts,
MenuStyle=TemplateMeta['MenuStyle'])
HTML, ContentHTML, Description, Image = PatchHTML(
File=File,
HTML=TemplatesText[Meta['Template']],
StaticPartsText=StaticPartsText,
DynamicParts=DynamicParts,
DynamicPartsText=DynamicPartsText,
HTMLPagesList=HTMLPagesList,
PagePath=PagePath[len(f"{OutDir}/"):],
Content=Content,
Titles=Titles,
Meta=Meta,
SiteDomain=SiteDomain,
SiteRoot=SiteRoot,
SiteName=SiteName,
BlogName=BlogName,
FolderRoots=FolderRoots,
Categories=Categories,
SiteLang=SiteLang,
Locale=Locale,
LightRun=LightRun)
HTML = ReplWithEsc(HTML, f"