diff --git a/README.md b/README.md index cddd7e9..d44793f 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,11 @@ Feel free to experiment with all of this stuff! - (Included) [htmlmin == 0.1.12](https://pypi.org/project/htmlmin) - [node == 12.22.5](https://nodejs.org) - [npm == 7.5.2](https://www.npmjs.com) - (Included) [pug-cli == 1.0.0-alpha6](https://npmjs.com/package/pug-cli) +- [Go](https://go.dev) +- [html2gmi](https://github.com/LukeEmmet/html2gmi) ## Features roadmap +- [ ] Polished Gemtext generation - [x] Autodetection of pages and posts - [x] Info for posts shown on their page - [x] HTML minification diff --git a/Source/Build.py b/Source/Build.py index 93414a3..b4cf9f0 100755 --- a/Source/Build.py +++ b/Source/Build.py @@ -13,13 +13,19 @@ import os import shutil from ast import literal_eval from datetime import datetime +from pathlib import Path + +# Our local Markdown patches conflict if the module is installed on the system, so first try to import from system +try: + from markdown import markdown +except ModuleNotFoundError: + from Libs.markdown import markdown + from Libs import htmlmin from Libs.bs4 import BeautifulSoup -from Libs.markdown import Markdown -from Libs.markdown import markdown -from pathlib import Path from Modules.Feed import * from Modules.Gemini import * +from Modules.Pug import * from Modules.Utils import * Extensions = { @@ -101,7 +107,11 @@ def FormatTitles(Titles): DashyTitles += [DashyTitle] Title = '[{}](#{})'.format(Title, DashyTitle) MDTitles += Heading + Title + '\n' - return Markdown().convert(MDTitles) + return markdown(MDTitles) + +# https://stackoverflow.com/a/15664273 +def IgnoreFiles(Dir, Files): + return [f for f in Files if os.path.isfile(os.path.join(Dir, f))] def LoadFromDir(Dir, Rglob): Contents = {} @@ -169,16 +179,6 @@ def PreProcessor(Path, SiteRoot): Content += l + '\n' return Content, Titles, Meta -def PugCompileList(Pages): - # Pug-cli seems to shit itself with folder paths as input, so we pass ALL the files as arguments - Paths = '' - for File, Content, Titles, Meta in Pages: - if File.endswith('.pug'): - Path = 'public/{}'.format(File) - WriteFile(Path, Content) - Paths += '"{}" '.format(Path) - os.system('pug -P {} > /dev/null'.format(Paths)) - def MakeContentHeader(Meta, Locale, Categories=''): Header = '' if Meta['Type'] == 'Post': @@ -187,7 +187,7 @@ def MakeContentHeader(Meta, Locale, Categories=''): Header += '{} {} \n'.format(Locale[i], Meta[i]) if Categories: Header += '{}: {} \n'.format(Locale['Categories'], Categories) - return Markdown().convert(Header) + return markdown(Header) def MakeCategoryLine(Meta, Reserved): Categories = '' @@ -243,10 +243,13 @@ def PatchHTML(Base, PartsText, ContextParts, ContextPartsText, HTMLPagesList, Pa for i in Categories: Base = Base.replace('[HTML:Category:{}]'.format(i), Categories[i]) + # TODO: Clean this doubling? Content = Content.replace('[HTML:Site:AbsoluteRoot]', SiteRoot) Content = Content.replace('[HTML:Site:RelativeRoot]', GetLevels(PagePath)) for i in FolderRoots: Content = Content.replace('[HTML:Folder:{}:AbsoluteRoot]'.format(i), FolderRoots[i]) + for i in Categories: + Content = Content.replace('[HTML:Category:{}]'.format(i), Categories[i]) return Base, Content, Description, Image @@ -311,12 +314,14 @@ def GetHTMLPagesList(Pages, SiteRoot, PathPrefix, Type='Page', Category=None, Fo Levels = '- ' * n Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix) List += Levels + Title + '\n' - return Markdown().convert(List) + return markdown(List) def DelTmp(): for Ext in Extensions['Pages']: for File in Path('public').rglob('*.{}'.format(Ext)): os.remove(File) + for File in Path('public').rglob('*.tmp'): + os.remove(File) def RevSort(List): List.sort() @@ -394,7 +399,7 @@ def MakeSite(TemplatesText, PartsText, ContextParts, ContextPartsText, SiteName, For='Menu') PagePath = 'public/{}.html'.format(StripExt(File)) if File.endswith('.md'): - Content = markdown(Content, extensions=['attr_list']) + Content = markdown(Content, extensions=['attr_list']) # TODO: Configurable extensions? elif File.endswith('.pug'): Content = ReadFile(PagePath) HTML, HTMLContent, Description, Image = PatchHTML( @@ -447,8 +452,12 @@ def Main(Args): ResetPublic() if os.path.isdir('Pages'): shutil.copytree('Pages', 'public') + if Args.GemtextOut: + shutil.copytree('Pages', 'public.gmi', ignore=IgnoreFiles) if os.path.isdir('Posts'): shutil.copytree('Posts', 'public/Posts') + if Args.GemtextOut: + shutil.copytree('Posts', 'public.gmi/Posts', ignore=IgnoreFiles) Pages = MakeSite( TemplatesText=LoadFromDir('Templates', '*.html'), @@ -475,10 +484,12 @@ def Main(Args): Lang=SiteLang, Minify=True if Args.Minify and Args.Minify not in ('False', 'None') else False) - #HTML2Gemtext( - # Pages=Pages, - # SiteName=SiteName, - # SiteTagline=SiteTagline) + if Args.GemtextOut: + GemtextCompileList(Pages) + #HTML2Gemtext( + # Pages=Pages, + # SiteName=SiteName, + # SiteTagline=SiteTagline) DelTmp() os.system("cp -R Assets/* public/") @@ -493,6 +504,7 @@ if __name__ == '__main__': Parser.add_argument('--SiteDomain', type=str) Parser.add_argument('--SiteTagline', type=str) Parser.add_argument('--FeedEntries', type=int) + Parser.add_argument('--GemtextOut', type=bool) Parser.add_argument('--FolderRoots', type=str) Parser.add_argument('--ContextParts', type=str) Parser.add_argument('--ReservedPaths', type=str) diff --git a/Source/Modules/Feed.py b/Source/Modules/Feed.py index 0f6c3f2..6391415 100644 --- a/Source/Modules/Feed.py +++ b/Source/Modules/Feed.py @@ -7,6 +7,8 @@ | Copyright (C) 2022, OctoSpacc | | ================================= """ +# TODO: Either switch feed generation lib, or rewrite the 'lxml' module, so that no modules have to be compiled and the program is 100% portable + from Libs.feedgen.feed import FeedGenerator from Modules.Utils import * diff --git a/Source/Modules/Gemini.py b/Source/Modules/Gemini.py index c4e3b7e..5a05a54 100644 --- a/Source/Modules/Gemini.py +++ b/Source/Modules/Gemini.py @@ -7,15 +7,75 @@ | Copyright (C) 2022, OctoSpacc | | ================================= """ +# TODO: Write the Python HTML2Gemtext converter + from Libs.bs4 import BeautifulSoup from Modules.Utils import * -def HTML2Gemtext(Pages, SiteName, SiteTagline): - os.mkdir('public.gmi') +ClosedTags = ( + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + 'p', 'span', 'pre', 'code', + 'a', 'b', 'i', 'del', 'strong', + 'div', 'details', 'summary', + 'ol', 'ul', 'li', 'dl', 'dt', 'dd') +OpenTags = ( + 'img') + +def GemtextCompileList(Pages): for File, Content, Titles, Meta, HTMLContent, Description, Image in Pages: - Parse = BeautifulSoup(HTMLContent, 'html.parser') - # We should first get the most basic HTML elements, convert them to Gemtext, then replace the Gemtext in the original full HTML, and then removing

tags? - #print(File, Parse.find_all('p'), Parse.find_all('li')) + Src = 'public/{}.html.tmp'.format(StripExt(File)) + WriteFile(Src, HTMLContent) + Dst = 'public.gmi/{}.gmi'.format(StripExt(File)) + os.system('cat {} | html2gmi > {}'.format(Src, Dst)) + +def FindEarliest(Str, Items): + Pos, Item = 0, '' + for Item in Items: + Str.find(Item) + return Pos, Item + +def ParseTag(Content): + print(Content) + Parse = BeautifulSoup(str(Content), 'html.parser') + Tag = Parse.find() + +def HTML2Gemtext(Pages, SiteName, SiteTagline): + #os.mkdir('public.gmi') + for File, Content, Titles, Meta, HTMLContent, Description, Image in Pages: + Gemtext = '' + Content = HTMLContent + print(File) + while len(Content) != 0: + BlockStart = Content.find('<') + TagEnd = Content.find('>') + Parse = BeautifulSoup(Content, 'html.parser') + Tag = Parse.find() + #if Tag.name in ('a'): + # if 'href' in Tag.attrs: + # pass + for i in Tag.contents: + ParseTag(i) + if Tag.name in ('h1', 'h2', 'h3'): + Gemtext += '#' * int(Tag.name[1]) + ' ' + elif Tag.name in ('h4', 'h5', 'h6'): + Gemtext += '### ' + elif Tag.name in ('li'): + Gemtext += '* ' + Gemtext += str(Tag.get_text()) + '\n\n' + #print(File, Tag.name, len(Tag.contents)) + if Tag.name in ClosedTags: + Str = ''.format(Tag.name) + elif Tag.name in OpenTags: + Str = '>' + BlockEnd = Content.find(Str) + len(Str) + Content = Content.replace(Content[BlockStart:TagEnd], '').replace(Content[BlockEnd-len(Str):BlockEnd], '') + #print(BlockStart, TagEnd, BlockEnd, Tag.contents) + #print(Content[BlockStart:BlockEnd]) + #Gemtext += Content[BlockStart:BlockEnd] + Content = Content[BlockEnd:] + PagePath = 'public.gmi/{}.gmi'.format(StripExt(File)) + WriteFile(PagePath, Gemtext) + #exit() """ Gemtext: # h1 diff --git a/Source/Modules/Pug.py b/Source/Modules/Pug.py new file mode 100644 index 0000000..b2418d2 --- /dev/null +++ b/Source/Modules/Pug.py @@ -0,0 +1,23 @@ +""" ================================= | +| This file is part of | +| staticoso | +| Just a simple Static Site Generator | +| | +| Licensed under the AGPLv3 license | +| Copyright (C) 2022, OctoSpacc | +| ================================= """ + +# TODO: Write a native Pug parser; There is one already available for Python but seems broken / out-of-date + +import os +from Modules.Utils import * + +def PugCompileList(Pages): + # Pug-cli seems to shit itself with folder paths as input, so we pass ALL the files as arguments + Paths = '' + for File, Content, Titles, Meta in Pages: + if File.endswith('.pug'): + Path = 'public/{}'.format(File) + WriteFile(Path, Content) + Paths += '"{}" '.format(Path) + os.system('pug -P {} > /dev/null'.format(Paths))