staticoso/Source/Modules/Site.py

""" ================================= |
| This file is part of                |
|   staticoso                         |
| Just a simple Static Site Generator |
|                                     |
| Licensed under the AGPLv3 license   |
|   Copyright (C) 2022, OctoSpacc     |
| ================================= """

from datetime import datetime
from Libs.bs4 import BeautifulSoup
from Modules.Config import *
from Modules.HTML import *
from Modules.Markdown import *
from Modules.Pug import *
from Modules.Utils import *

HTMLSectionTitleLine = '<h{Index} class="SectionHeading"><span class="SectionLink"><a href="#{DashTitle}"><span>»</span></a> </span><span class="SectionTitle" id="{DashTitle}">{Title}</span></h{Index}>'
#PugSectionTitleLine = "{Line[:Index]}{Line[Index:Index+2]}.SectionHeading #[span.SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle {Line[Index+2:]}]"
CategoryPageTemplate = """\
// Title: {Name}
// Type: Page
// Index: True

# {Name}

<div>[staticoso:Category:{Name}]</div>
"""

def DashifyTitle(Title, Done=[]):
	return UndupeStr(DashifyStr(Title.lstrip(' ').rstrip(' ')), Done, '-')

def MakeLinkableTitle(Line, Title, DashTitle, Type):
	if Type == 'md':
		Index = Title.split(' ')[0].count('#')
		return HTMLSectionTitleLine.format(
			Index=Index,
			DashTitle=DashTitle,
			Title=Title[Index+1:])
	elif Type == 'pug':
		Index = Line.find('h')
		return f"{Line[:Index]}{Line[Index:Index+2]}.SectionHeading #[span.SectionLink #[a(href='#{DashTitle}') #[span »]] ]#[span#{DashTitle}.SectionTitle {Line[Index+2:]}]"

def GetTitle(FileName, Meta, Titles, Prefer='MetaTitle', BlogName=None):
	if Prefer == 'BodyTitle':
		Title = Titles[0].lstrip('#') if Titles else Meta['Title'] if Meta['Title'] else FileName
	elif Prefer == 'MetaTitle':
		Title = Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
	elif Prefer == 'HTMLTitle':
		Title = Meta['HTMLTitle'] if Meta['HTMLTitle'] else Meta['Title'] if Meta['Title'] else Titles[0].lstrip('#') if Titles else FileName
	if BlogName and 'Blog' in Meta['Categories']:
		Title += ' - ' + BlogName
	return Title

def GetDescription(Meta, BodyDescription, Prefer='MetaDescription'):
	if Prefer == 'BodyDescription':
		Description = BodyDescription if BodyDescription else Meta['Description'] if Meta['Description'] else ''
	elif Prefer == 'MetaDescription':
		Description = Meta['Description'] if Meta['Description'] else BodyDescription if BodyDescription else ''
	return Description

def GetImage(Meta, BodyImage, Prefer='MetaImage'):
	if Prefer == 'BodyImage':
		Image = BodyImage if BodyImage else Meta['Image'] if Meta['Image'] else ''
	elif Prefer == 'MetaImage':
		Image = Meta['Image'] if Meta['Image'] else BodyImage if BodyImage else ''
	return Image

def MakeContentHeader(Meta, Locale, Categories=''):
	Header = ''
	for i in ['CreatedOn', 'EditedOn']:
		if Meta[i]:
			Header += f"{Locale[i]}: {Meta[i]}  \n"
	if Categories:
		Header += f"{Locale['Categories']}: {Categories}  \n"
	return markdown(Header.rstrip())

def MakeCategoryLine(File, Meta):
	Categories = ''
	if Meta['Categories']:
		for i in Meta['Categories']:
			Categories += f" [{i}]({GetPathLevels(File)}Categories/{i}.html) "
	return Categories

def GetHTMLPagesList(Pages, BlogName, SiteRoot, PathPrefix, Unite=[], Type='Page', Category=None, For='Menu', MarkdownExts=(), MenuStyle='Default'):
	ShowPaths, Flatten, SingleLine = True, False, False
	if MenuStyle == 'Flat':
		Flatten = True
	elif MenuStyle == 'Line':
		ShowPaths, SingleLine = False, True
	List, ToPop, LastParent = '', [], []
	IndexPages = Pages.copy()
	for e in IndexPages:
		if e[3]['Index'] == 'False' or e[3]['Index'] == 'None':
			IndexPages.remove(e)
	for i,e in enumerate(IndexPages):
		if e[3]['Type'] != Type:
			ToPop += [i]
	ToPop = RevSort(ToPop)
	for i in ToPop:
		IndexPages.pop(i)
	if Type == 'Page':
		IndexPages = OrderPages(IndexPages)
	for i,e in enumerate(Unite):
		if e:
			IndexPages.insert(i,[e,None,None,{'Type':Type,'Index':'True','Order':'Unite'}])
	for File, Content, Titles, Meta in IndexPages:
		if Meta['Type'] == Type and CanIndex(Meta['Index'], For) and (not Category or Category in Meta['Categories']):
			Depth = (File.count('/') + 1) if Meta['Order'] != 'Unite' else 1
			if Depth > 1 and Meta['Order'] != 'Unite': # Folder names are handled here
				CurParent = File.split('/')[:-1]
				for i,s in enumerate(CurParent):
					if LastParent != CurParent and ShowPaths:
						LastParent = CurParent
						Levels = '- ' * ((Depth-1+i) if not Flatten else 1)
						 # Folders with else without an index file
						if StripExt(File).endswith('index'):
							Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, BlogName, PathPrefix)
						else:
							Title = CurParent[Depth-2+i]
						if SingleLine:
							List += ' <span>' + Title + '</span> '
						else:
							List += Levels + Title + '\n'
			if not (Depth > 1 and StripExt(File).split('/')[-1] == 'index'):
				Levels = '- ' * (Depth if not Flatten else 1)
				if Meta['Order'] == 'Unite':
					Title = File
				else:
					Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, BlogName, PathPrefix)
				if SingleLine:
					List += ' <span>' + Title + '</span> '
				else:
					List += Levels + Title + '\n'
	return markdown(MarkdownHTMLEscape(List, MarkdownExts), extensions=MarkdownExts)

def TemplatePreprocessor(Text):
	Meta, MetaDefault = '', {
		'MenuStyle': 'Default'}
	for l in Text.splitlines():
		ll = l.lstrip()
		if ll.startswith('<!--'):
			lll = ll[4:].lstrip().rstrip()
			if lll.startswith('%') and lll.endswith('-->'):
				Meta += lll[1:-3].lstrip().rstrip() + '\n'
	Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
	for i in MetaDefault:
		if not i in Meta:
			Meta.update({i:MetaDefault[i]})
	return Meta

def PagePreprocessor(Path, Type, SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=False):
	File = ReadFile(Path)
	Path = Path.lower()
	Content, Titles, DashyTitles, HTMLTitlesFound, Macros, Meta, MetaDefault = '', [], [], False, '', '', {
		'Template': SiteTemplate,
		'Style': '',
		'Type': Type,
		'Index': 'Unspecified',
		'Feed': 'True',
		'Title': '',
		'HTMLTitle': '',
		'Description': '',
		'Image': '',
		'Macros': {},
		'Categories': [],
		'CreatedOn': '',
		'EditedOn': '',
		'Order': None}
	for l in File.splitlines():
		ll = l.lstrip()
		if ll.startswith('//'):
			lll = ll[2:].lstrip()
			if lll.startswith('%'):
				Meta += lll[1:].lstrip() + '\n'
			elif lll.startswith('$'):
				Macros += lll[1:].lstrip() + '\n'
		else:
			Headings = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
			if Path.endswith(FileExtensions['HTML']) and not HTMLTitlesFound:
				Soup = BeautifulSoup(File, 'html.parser')
				Tags = Soup.find_all()
				for t in Tags:
					if t.name in Headings:
						Title = '#'*int(t.name[1]) + ' ' + str(t.text)
						DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
						DashyTitles += [DashTitle]
						Titles += [Title]
						t.replace_with(MakeLinkableTitle(None, Title, DashTitle, 'md'))
				Content = str(Soup.prettify(formatter=None))
				HTMLTitlesFound = True
			elif Path.endswith(FileExtensions['Markdown']):
				if ll.startswith('#') or (ll.startswith('<') and ll[1:].startswith(Headings)):
					if ll.startswith('#'):
						Title = ll
						#Index = Title.split(' ')[0].count('#')
					elif ll.startswith('<'):
						#Index = int(ll[2])
						Title = '#'*h + str(ll[3:])
					DashTitle = DashifyTitle(MkSoup(Title.lstrip('#')).get_text(), DashyTitles)
					DashyTitles += [DashTitle]
					Titles += [Title]
					Title = MakeLinkableTitle(None, Title, DashTitle, 'md')
					Title = Title.replace('> </', '>  </')
					Title = Title.replace(' </', '</')
					Content += Title + '\n'
				else:
					Content += l + '\n'
			elif Path.endswith('.pug'):
				if ll.startswith(Headings):
					if ll[2:].startswith(("(class='NoTitle", '(class="NoTitle')):
						Content += l + '\n'
					else:
						Title = '#'*int(ll[1]) + str(ll[3:])
						DashTitle = DashifyTitle(Title.lstrip('#'), DashyTitles)
						DashyTitles += [DashTitle]
						Titles += [Title]
						# TODO: We should handle headers that for any reason already have parenthesis
						if ll[2:] == '(':
							Content += l + '\n'
						else:
							Content += MakeLinkableTitle(l, Title, DashTitle, 'pug') + '\n'
				else:
					Content += l + '\n'
			elif Path.endswith('.txt'):
				Content += l + '\n'
	Meta = dict(ReadConf(LoadConfStr('[Meta]\n' + Meta), 'Meta'))
	for i in MetaDefault:
		if i in Meta:
			if i == 'Categories':
				Categories = Meta['Categories'].split(' ')
				Meta['Categories'] = []
				for j in Categories:
					Meta['Categories'] += [j]
		else:
			Meta.update({i:MetaDefault[i]})
	if Meta['Index'] in ('Default', 'Unspecified'):
		if not Meta['Categories']:
			Meta['Categories'] = [CategoryUncategorized]
		if Meta['Type'] == 'Page':
			Meta['Index'] = 'False'
		elif Meta['Type'] == 'Post':
			Meta['Index'] = 'True'
	if GlobalMacros:
		Meta['Macros'].update(GlobalMacros)
	Meta['Macros'].update(ReadConf(LoadConfStr('[Macros]\n' + Macros), 'Macros'))
	return Content, Titles, Meta

def PagePostprocessor(FileType, Text, Meta):
	for e in Meta['Macros']:
		Text = ReplWithEsc(Text, f"[: {e} :]", f"[:{e}:]")
	return Text

def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, BlogName, PathPrefix=''):
	Title = GetTitle(File.split('/')[-1], Meta, Titles, Prefer, BlogName)
	Link = False if Meta['Index'] == 'Unlinked' else True
	if Link:
		Title = '[{}]({})'.format(
			Title,
			'{}{}.html'.format(PathPrefix, StripExt(File)))
	if Meta['Type'] == 'Post':
		CreatedOn = Meta['CreatedOn'] if Meta['CreatedOn'] else '?'
		Title = f"[{CreatedOn}] {Title}"
	return Title

def FormatTitles(Titles, Flatten=False):
	# TODO: Somehow titles written in Pug can end up here and don't work, they should be handled
	MDTitles, DashyTitles = '', []
	for t in Titles:
		n = t.split(' ')[0].count('#')
		Heading = '- ' * (n if not Flatten else 1)
		Title = MkSoup(t.lstrip('#')).get_text()
		DashyTitle = DashifyTitle(Title, DashyTitles)
		DashyTitles += [DashyTitle]
		Title = f"[{Title}](#{DashyTitle})"
		MDTitles += Heading + Title + '\n'
	return markdown(MDTitles)

def OrderPages(Old):
	New, NoOrder, Max = [], [], 0
	for i,e in enumerate(Old):
		Curr = e[3]['Order']
		if Curr:
			if int(Curr) > Max:
				Max = int(Curr)
		else:
			NoOrder += [e]
	New = [None] * (Max+1)
	for i,e in enumerate(Old):
		Curr = e[3]['Order']
		if Curr:
			New[int(Curr)] = e
	while None in New:
		New.remove(None)
	return New + NoOrder

def CanIndex(Index, For):
	if Index in ('False', 'None'):
		return False
	elif Index in ('True', 'All', 'Unlinked'):
		return True
	else:
		return True if Index == For else False

def PatchHTML(File, HTML, StaticPartsText, DynamicParts, DynamicPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale, LightRun):
	HTMLTitles = FormatTitles(Titles)
	BodyDescription, BodyImage = '', ''
	if not File.lower().endswith('.txt'):
		Soup = BeautifulSoup(Content, 'html.parser')

		if not BodyDescription and Soup.p:
			BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
		if not BodyImage and Soup.img and Soup.img['src']:
			BodyImage = Soup.img['src']

		#Content = SquareFnrefs(Content)
		Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')

	Title = GetTitle(File.split('/')[-1], Meta, Titles, 'MetaTitle', BlogName)
	Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
	Image = GetImage(Meta, BodyImage, 'MetaImage')

	for Line in HTML.splitlines():
		Line = Line.lstrip().rstrip()
		if Line.startswith('[staticoso:DynamicPart:') and Line.endswith(']'):
			Path =  Line[len('[staticoso:DynamicPart:'):-1]
			Section = Path.split('/')[-1]
			if Section in DynamicParts:
				Part = DynamicParts[Section]
				Text = ''
				if type(Part) == list:
					for e in Part:
						Text += DynamicPartsText[f"{Path}/{e}"] + '\n'
				elif type(Part) == str:
					Text = DynamicPartsText[f"{Path}/{Part}"]
			else:
				Text = ''
			HTML = ReplWithEsc(HTML, f"[staticoso:DynamicPart:{Path}]", Text)

	for e in StaticPartsText:
		HTML = ReplWithEsc(HTML, f"[staticoso:StaticPart:{e}]", StaticPartsText[e])

	if LightRun:
		HTML = None
	else:
		HTML = DictReplWithEsc(
			HTML, {
				'[staticoso:Site:Menu]': HTMLPagesList,
				'[staticoso:Page:Lang]': SiteLang,
				'[staticoso:Page:Chapters]': HTMLTitles,
				'[staticoso:Page:Title]': Title,
				'[staticoso:Page:Description]': Description,
				'[staticoso:Page:Image]': Image,
				'[staticoso:Page:Path]': PagePath,
				'[staticoso:Page:Style]': Meta['Style'],
				'[staticoso:Page:Content]': Content,
				'[staticoso:Page:ContentInfo]': MakeContentHeader(Meta, Locale, MakeCategoryLine(File, Meta)),
				'[staticoso:BuildTime]': datetime.now().strftime('%Y-%m-%d %H:%M'),
				'[staticoso:Site:Name]': SiteName,
				'[staticoso:Site:AbsoluteRoot]': SiteRoot,
				'[staticoso:Site:RelativeRoot]': GetPathLevels(PagePath)
			})
		for e in Meta['Macros']:
			HTML = ReplWithEsc(HTML, f"[:{e}:]", Meta['Macros'][e])
		for e in FolderRoots:
			HTML = ReplWithEsc(HTML, f"[staticoso:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
		for e in Categories:
			HTML = ReplWithEsc(HTML, f"<span>[staticoso:Category:{e}]</span>", Categories[e])
			HTML = ReplWithEsc(HTML, f"[staticoso:Category:{e}]", Categories[e])

	# TODO: Clean this doubling?
	ContentHTML = Content
	ContentHTML = DictReplWithEsc(
		ContentHTML, {
			'[staticoso:Page:Title]': Title,
			'[staticoso:Page:Description]': Description,
			'[staticoso:Site:Name]': SiteName,
			'[staticoso:Site:AbsoluteRoot]': SiteRoot,
			'[staticoso:Site:RelativeRoot]': GetPathLevels(PagePath)
		})
	for e in Meta['Macros']:
		ContentHTML = ReplWithEsc(ContentHTML, f"[:{e}:]", Meta['Macros'][e])
	for e in FolderRoots:
		ContentHTML = ReplWithEsc(ContentHTML, f"[staticoso:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
	for e in Categories:
		ContentHTML = ReplWithEsc(ContentHTML, f"<span>[staticoso:Category:{e}]</span>", Categories[e])
		ContentHTML = ReplWithEsc(ContentHTML, f"[staticoso:Category:{e}]", Categories[e])

	return HTML, ContentHTML, Description, Image

def MakeSite(OutputDir, LimitFiles, TemplatesText, StaticPartsText, DynamicParts, DynamicPartsText, ConfMenu, GlobalMacros, SiteName, BlogName, SiteTagline, SiteTemplate, SiteDomain, SiteRoot, FolderRoots, SiteLang, Locale, Minify, MinifyKeepComments, NoScripts, ImgAltToTitle, ImgTitleToAlt, Sorting, MarkdownExts, AutoCategories, CategoryUncategorized):
	PagesPaths, PostsPaths, Pages, MadePages, Categories = [], [], [], [], {}
	for Ext in FileExtensions['Pages']:
		for File in Path('Pages').rglob(f"*.{Ext}"):
			PagesPaths += [FileToStr(File, 'Pages/')]
		for File in Path('Posts').rglob(f"*.{Ext}"):
			PostsPaths += [FileToStr(File, 'Posts/')]

	PagesPaths = FileNameDateSort(PagesPaths)
	if Sorting['Pages'] == 'Inverse':
		PagesPaths.reverse()
	PostsPaths = FileNameDateSort(PostsPaths)
	if Sorting['Posts'] == 'Inverse':
		PostsPaths.reverse()

	print("[I] Preprocessing Source Pages")
	for Type in ['Page', 'Post']:
		if Type == 'Page':
			Files = PagesPaths
			PathPrefix = ''
		elif Type == 'Post':
			Files = PostsPaths
			PathPrefix = 'Posts/'
		for File in Files:
			TempPath = f"{PathPrefix}{File}"
			LightRun = False if LimitFiles == False or TempPath in LimitFiles else True
			Content, Titles, Meta = PagePreprocessor(f"{Type}s/{File}", Type, SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=LightRun)
			Pages += [[TempPath, Content, Titles, Meta]]
			for Cat in Meta['Categories']:
				Categories.update({Cat:''})
	PugCompileList(OutputDir, Pages, LimitFiles)

	if Categories:
		print("[I] Generating Category Lists")
		for Cat in Categories:
			for Type in ('Page', 'Post'):
				Categories[Cat] += GetHTMLPagesList(
					Pages=Pages,
					BlogName=BlogName,
					SiteRoot=SiteRoot,
					PathPrefix=GetPathLevels('Categories/'),
					Type=Type,
					Category=Cat,
					For='Categories',
					MarkdownExts=MarkdownExts,
					MenuStyle='Flat')

	if AutoCategories:
		Dir = f"{OutputDir}/Categories"
		for Cat in Categories:
			Exists = False
			for File in Path(Dir).rglob(str(Cat)+'.*'):
				Exists = True
				break
			if not Exists:
				File = f"Categories/{Cat}.md"
				FilePath = f"{OutputDir}/{File}"
				WriteFile(FilePath, CategoryPageTemplate.format(Title=Cat))
				Content, Titles, Meta = PagePreprocessor(FilePath, 'Page', SiteTemplate, SiteRoot, GlobalMacros, CategoryUncategorized, LightRun=LightRun)
				Pages += [[File, Content, Titles, Meta]]

	for i,e in enumerate(ConfMenu):
		for File, Content, Titles, Meta in Pages:
			File = StripExt(File)+'.html'
			if e == File:
				ConfMenu[i] = None

	print("[I] Writing Pages")
	for File, Content, Titles, Meta in Pages:
		LightRun = False if LimitFiles == False or File in LimitFiles else True

		PagePath = f"{OutputDir}/{StripExt(File)}.html"
		if File.lower().endswith(FileExtensions['Markdown']):
			Content = markdown(PagePostprocessor('md', Content, Meta), extensions=MarkdownExts)
		elif File.lower().endswith(('.pug')):
			Content = PagePostprocessor('pug', ReadFile(PagePath), Meta)
		elif File.lower().endswith(('.txt')):
			Content = '<pre>' + html.escape(Content) + '</pre>'
		elif File.lower().endswith(FileExtensions['HTML']):
			Content = ReadFile(PagePath)

		if LightRun:
			HTMLPagesList = None
		else:
			TemplateMeta = TemplatePreprocessor(TemplatesText[Meta['Template']])
			HTMLPagesList = GetHTMLPagesList(
				Pages=Pages,
				BlogName=BlogName,
				SiteRoot=SiteRoot,
				PathPrefix=GetPathLevels(File),
				Unite=ConfMenu,
				Type='Page',
				For='Menu',
				MarkdownExts=MarkdownExts,
				MenuStyle=TemplateMeta['MenuStyle'])

		HTML, ContentHTML, Description, Image = PatchHTML(
			File=File,
			HTML=TemplatesText[Meta['Template']],
			StaticPartsText=StaticPartsText,
			DynamicParts=DynamicParts,
			DynamicPartsText=DynamicPartsText,
			HTMLPagesList=HTMLPagesList,
			PagePath=PagePath[len(f"{OutputDir}/"):],
			Content=Content,
			Titles=Titles,
			Meta=Meta,
			SiteRoot=SiteRoot,
			SiteName=SiteName,
			BlogName=BlogName,
			FolderRoots=FolderRoots,
			Categories=Categories,
			SiteLang=SiteLang,
			Locale=Locale,
			LightRun=LightRun)

		if Minify:
			if not LightRun:
				HTML = DoMinifyHTML(HTML, MinifyKeepComments)
			ContentHTML = DoMinifyHTML(ContentHTML, MinifyKeepComments)
		if NoScripts:
			if not LightRun:
				HTML = StripTags(HTML, ['script'])
			ContentHTML = StripTags(ContentHTML, ['script'])
		if ImgAltToTitle or ImgTitleToAlt:
			if not LightRun:
				HTML = WriteImgAltAndTitle(HTML, ImgAltToTitle, ImgTitleToAlt)
			ContentHTML = WriteImgAltAndTitle(ContentHTML, ImgAltToTitle, ImgTitleToAlt)

		if LightRun:
			SlimHTML = None
		else:
			SlimHTML = HTMLPagesList + ContentHTML

		if not LightRun:
			WriteFile(PagePath, HTML)

		MadePages += [[File, Content, Titles, Meta, ContentHTML, SlimHTML, Description, Image]]

	return MadePages