mirror of
https://gitlab.com/octtspacc/staticoso
synced 2025-03-13 17:50:04 +01:00
Working but really not Gemtext generation
This commit is contained in:
parent
fc3c0b8be6
commit
1962a808bc
@ -17,8 +17,11 @@ Feel free to experiment with all of this stuff!
|
|||||||
- (Included) [htmlmin == 0.1.12](https://pypi.org/project/htmlmin)
|
- (Included) [htmlmin == 0.1.12](https://pypi.org/project/htmlmin)
|
||||||
- [node == 12.22.5](https://nodejs.org) - [npm == 7.5.2](https://www.npmjs.com)
|
- [node == 12.22.5](https://nodejs.org) - [npm == 7.5.2](https://www.npmjs.com)
|
||||||
- (Included) [pug-cli == 1.0.0-alpha6](https://npmjs.com/package/pug-cli)
|
- (Included) [pug-cli == 1.0.0-alpha6](https://npmjs.com/package/pug-cli)
|
||||||
|
- [Go](https://go.dev)
|
||||||
|
- [html2gmi](https://github.com/LukeEmmet/html2gmi)
|
||||||
|
|
||||||
## Features roadmap
|
## Features roadmap
|
||||||
|
- [ ] Polished Gemtext generation
|
||||||
- [x] Autodetection of pages and posts
|
- [x] Autodetection of pages and posts
|
||||||
- [x] Info for posts shown on their page
|
- [x] Info for posts shown on their page
|
||||||
- [x] HTML minification
|
- [x] HTML minification
|
||||||
|
@ -13,13 +13,19 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
from ast import literal_eval
|
from ast import literal_eval
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Our local Markdown patches conflict if the module is installed on the system, so first try to import from system
|
||||||
|
try:
|
||||||
|
from markdown import markdown
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
from Libs.markdown import markdown
|
||||||
|
|
||||||
from Libs import htmlmin
|
from Libs import htmlmin
|
||||||
from Libs.bs4 import BeautifulSoup
|
from Libs.bs4 import BeautifulSoup
|
||||||
from Libs.markdown import Markdown
|
|
||||||
from Libs.markdown import markdown
|
|
||||||
from pathlib import Path
|
|
||||||
from Modules.Feed import *
|
from Modules.Feed import *
|
||||||
from Modules.Gemini import *
|
from Modules.Gemini import *
|
||||||
|
from Modules.Pug import *
|
||||||
from Modules.Utils import *
|
from Modules.Utils import *
|
||||||
|
|
||||||
Extensions = {
|
Extensions = {
|
||||||
@ -101,7 +107,11 @@ def FormatTitles(Titles):
|
|||||||
DashyTitles += [DashyTitle]
|
DashyTitles += [DashyTitle]
|
||||||
Title = '[{}](#{})'.format(Title, DashyTitle)
|
Title = '[{}](#{})'.format(Title, DashyTitle)
|
||||||
MDTitles += Heading + Title + '\n'
|
MDTitles += Heading + Title + '\n'
|
||||||
return Markdown().convert(MDTitles)
|
return markdown(MDTitles)
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/15664273
|
||||||
|
def IgnoreFiles(Dir, Files):
|
||||||
|
return [f for f in Files if os.path.isfile(os.path.join(Dir, f))]
|
||||||
|
|
||||||
def LoadFromDir(Dir, Rglob):
|
def LoadFromDir(Dir, Rglob):
|
||||||
Contents = {}
|
Contents = {}
|
||||||
@ -169,16 +179,6 @@ def PreProcessor(Path, SiteRoot):
|
|||||||
Content += l + '\n'
|
Content += l + '\n'
|
||||||
return Content, Titles, Meta
|
return Content, Titles, Meta
|
||||||
|
|
||||||
def PugCompileList(Pages):
|
|
||||||
# Pug-cli seems to shit itself with folder paths as input, so we pass ALL the files as arguments
|
|
||||||
Paths = ''
|
|
||||||
for File, Content, Titles, Meta in Pages:
|
|
||||||
if File.endswith('.pug'):
|
|
||||||
Path = 'public/{}'.format(File)
|
|
||||||
WriteFile(Path, Content)
|
|
||||||
Paths += '"{}" '.format(Path)
|
|
||||||
os.system('pug -P {} > /dev/null'.format(Paths))
|
|
||||||
|
|
||||||
def MakeContentHeader(Meta, Locale, Categories=''):
|
def MakeContentHeader(Meta, Locale, Categories=''):
|
||||||
Header = ''
|
Header = ''
|
||||||
if Meta['Type'] == 'Post':
|
if Meta['Type'] == 'Post':
|
||||||
@ -187,7 +187,7 @@ def MakeContentHeader(Meta, Locale, Categories=''):
|
|||||||
Header += '{} {} \n'.format(Locale[i], Meta[i])
|
Header += '{} {} \n'.format(Locale[i], Meta[i])
|
||||||
if Categories:
|
if Categories:
|
||||||
Header += '{}: {} \n'.format(Locale['Categories'], Categories)
|
Header += '{}: {} \n'.format(Locale['Categories'], Categories)
|
||||||
return Markdown().convert(Header)
|
return markdown(Header)
|
||||||
|
|
||||||
def MakeCategoryLine(Meta, Reserved):
|
def MakeCategoryLine(Meta, Reserved):
|
||||||
Categories = ''
|
Categories = ''
|
||||||
@ -243,10 +243,13 @@ def PatchHTML(Base, PartsText, ContextParts, ContextPartsText, HTMLPagesList, Pa
|
|||||||
for i in Categories:
|
for i in Categories:
|
||||||
Base = Base.replace('<span>[HTML:Category:{}]</span>'.format(i), Categories[i])
|
Base = Base.replace('<span>[HTML:Category:{}]</span>'.format(i), Categories[i])
|
||||||
|
|
||||||
|
# TODO: Clean this doubling?
|
||||||
Content = Content.replace('[HTML:Site:AbsoluteRoot]', SiteRoot)
|
Content = Content.replace('[HTML:Site:AbsoluteRoot]', SiteRoot)
|
||||||
Content = Content.replace('[HTML:Site:RelativeRoot]', GetLevels(PagePath))
|
Content = Content.replace('[HTML:Site:RelativeRoot]', GetLevels(PagePath))
|
||||||
for i in FolderRoots:
|
for i in FolderRoots:
|
||||||
Content = Content.replace('[HTML:Folder:{}:AbsoluteRoot]'.format(i), FolderRoots[i])
|
Content = Content.replace('[HTML:Folder:{}:AbsoluteRoot]'.format(i), FolderRoots[i])
|
||||||
|
for i in Categories:
|
||||||
|
Content = Content.replace('<span>[HTML:Category:{}]</span>'.format(i), Categories[i])
|
||||||
|
|
||||||
return Base, Content, Description, Image
|
return Base, Content, Description, Image
|
||||||
|
|
||||||
@ -311,12 +314,14 @@ def GetHTMLPagesList(Pages, SiteRoot, PathPrefix, Type='Page', Category=None, Fo
|
|||||||
Levels = '- ' * n
|
Levels = '- ' * n
|
||||||
Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix)
|
Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix)
|
||||||
List += Levels + Title + '\n'
|
List += Levels + Title + '\n'
|
||||||
return Markdown().convert(List)
|
return markdown(List)
|
||||||
|
|
||||||
def DelTmp():
|
def DelTmp():
|
||||||
for Ext in Extensions['Pages']:
|
for Ext in Extensions['Pages']:
|
||||||
for File in Path('public').rglob('*.{}'.format(Ext)):
|
for File in Path('public').rglob('*.{}'.format(Ext)):
|
||||||
os.remove(File)
|
os.remove(File)
|
||||||
|
for File in Path('public').rglob('*.tmp'):
|
||||||
|
os.remove(File)
|
||||||
|
|
||||||
def RevSort(List):
|
def RevSort(List):
|
||||||
List.sort()
|
List.sort()
|
||||||
@ -394,7 +399,7 @@ def MakeSite(TemplatesText, PartsText, ContextParts, ContextPartsText, SiteName,
|
|||||||
For='Menu')
|
For='Menu')
|
||||||
PagePath = 'public/{}.html'.format(StripExt(File))
|
PagePath = 'public/{}.html'.format(StripExt(File))
|
||||||
if File.endswith('.md'):
|
if File.endswith('.md'):
|
||||||
Content = markdown(Content, extensions=['attr_list'])
|
Content = markdown(Content, extensions=['attr_list']) # TODO: Configurable extensions?
|
||||||
elif File.endswith('.pug'):
|
elif File.endswith('.pug'):
|
||||||
Content = ReadFile(PagePath)
|
Content = ReadFile(PagePath)
|
||||||
HTML, HTMLContent, Description, Image = PatchHTML(
|
HTML, HTMLContent, Description, Image = PatchHTML(
|
||||||
@ -447,8 +452,12 @@ def Main(Args):
|
|||||||
ResetPublic()
|
ResetPublic()
|
||||||
if os.path.isdir('Pages'):
|
if os.path.isdir('Pages'):
|
||||||
shutil.copytree('Pages', 'public')
|
shutil.copytree('Pages', 'public')
|
||||||
|
if Args.GemtextOut:
|
||||||
|
shutil.copytree('Pages', 'public.gmi', ignore=IgnoreFiles)
|
||||||
if os.path.isdir('Posts'):
|
if os.path.isdir('Posts'):
|
||||||
shutil.copytree('Posts', 'public/Posts')
|
shutil.copytree('Posts', 'public/Posts')
|
||||||
|
if Args.GemtextOut:
|
||||||
|
shutil.copytree('Posts', 'public.gmi/Posts', ignore=IgnoreFiles)
|
||||||
|
|
||||||
Pages = MakeSite(
|
Pages = MakeSite(
|
||||||
TemplatesText=LoadFromDir('Templates', '*.html'),
|
TemplatesText=LoadFromDir('Templates', '*.html'),
|
||||||
@ -475,6 +484,8 @@ def Main(Args):
|
|||||||
Lang=SiteLang,
|
Lang=SiteLang,
|
||||||
Minify=True if Args.Minify and Args.Minify not in ('False', 'None') else False)
|
Minify=True if Args.Minify and Args.Minify not in ('False', 'None') else False)
|
||||||
|
|
||||||
|
if Args.GemtextOut:
|
||||||
|
GemtextCompileList(Pages)
|
||||||
#HTML2Gemtext(
|
#HTML2Gemtext(
|
||||||
# Pages=Pages,
|
# Pages=Pages,
|
||||||
# SiteName=SiteName,
|
# SiteName=SiteName,
|
||||||
@ -493,6 +504,7 @@ if __name__ == '__main__':
|
|||||||
Parser.add_argument('--SiteDomain', type=str)
|
Parser.add_argument('--SiteDomain', type=str)
|
||||||
Parser.add_argument('--SiteTagline', type=str)
|
Parser.add_argument('--SiteTagline', type=str)
|
||||||
Parser.add_argument('--FeedEntries', type=int)
|
Parser.add_argument('--FeedEntries', type=int)
|
||||||
|
Parser.add_argument('--GemtextOut', type=bool)
|
||||||
Parser.add_argument('--FolderRoots', type=str)
|
Parser.add_argument('--FolderRoots', type=str)
|
||||||
Parser.add_argument('--ContextParts', type=str)
|
Parser.add_argument('--ContextParts', type=str)
|
||||||
Parser.add_argument('--ReservedPaths', type=str)
|
Parser.add_argument('--ReservedPaths', type=str)
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
| Copyright (C) 2022, OctoSpacc |
|
| Copyright (C) 2022, OctoSpacc |
|
||||||
| ================================= """
|
| ================================= """
|
||||||
|
|
||||||
|
# TODO: Either switch feed generation lib, or rewrite the 'lxml' module, so that no modules have to be compiled and the program is 100% portable
|
||||||
|
|
||||||
from Libs.feedgen.feed import FeedGenerator
|
from Libs.feedgen.feed import FeedGenerator
|
||||||
from Modules.Utils import *
|
from Modules.Utils import *
|
||||||
|
|
||||||
|
@ -7,15 +7,75 @@
|
|||||||
| Copyright (C) 2022, OctoSpacc |
|
| Copyright (C) 2022, OctoSpacc |
|
||||||
| ================================= """
|
| ================================= """
|
||||||
|
|
||||||
|
# TODO: Write the Python HTML2Gemtext converter
|
||||||
|
|
||||||
from Libs.bs4 import BeautifulSoup
|
from Libs.bs4 import BeautifulSoup
|
||||||
from Modules.Utils import *
|
from Modules.Utils import *
|
||||||
|
|
||||||
def HTML2Gemtext(Pages, SiteName, SiteTagline):
|
ClosedTags = (
|
||||||
os.mkdir('public.gmi')
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||||
|
'p', 'span', 'pre', 'code',
|
||||||
|
'a', 'b', 'i', 'del', 'strong',
|
||||||
|
'div', 'details', 'summary',
|
||||||
|
'ol', 'ul', 'li', 'dl', 'dt', 'dd')
|
||||||
|
OpenTags = (
|
||||||
|
'img')
|
||||||
|
|
||||||
|
def GemtextCompileList(Pages):
|
||||||
for File, Content, Titles, Meta, HTMLContent, Description, Image in Pages:
|
for File, Content, Titles, Meta, HTMLContent, Description, Image in Pages:
|
||||||
Parse = BeautifulSoup(HTMLContent, 'html.parser')
|
Src = 'public/{}.html.tmp'.format(StripExt(File))
|
||||||
# We should first get the most basic HTML elements, convert them to Gemtext, then replace the Gemtext in the original full HTML, and then removing <p> tags?
|
WriteFile(Src, HTMLContent)
|
||||||
#print(File, Parse.find_all('p'), Parse.find_all('li'))
|
Dst = 'public.gmi/{}.gmi'.format(StripExt(File))
|
||||||
|
os.system('cat {} | html2gmi > {}'.format(Src, Dst))
|
||||||
|
|
||||||
|
def FindEarliest(Str, Items):
|
||||||
|
Pos, Item = 0, ''
|
||||||
|
for Item in Items:
|
||||||
|
Str.find(Item)
|
||||||
|
return Pos, Item
|
||||||
|
|
||||||
|
def ParseTag(Content):
|
||||||
|
print(Content)
|
||||||
|
Parse = BeautifulSoup(str(Content), 'html.parser')
|
||||||
|
Tag = Parse.find()
|
||||||
|
|
||||||
|
def HTML2Gemtext(Pages, SiteName, SiteTagline):
|
||||||
|
#os.mkdir('public.gmi')
|
||||||
|
for File, Content, Titles, Meta, HTMLContent, Description, Image in Pages:
|
||||||
|
Gemtext = ''
|
||||||
|
Content = HTMLContent
|
||||||
|
print(File)
|
||||||
|
while len(Content) != 0:
|
||||||
|
BlockStart = Content.find('<')
|
||||||
|
TagEnd = Content.find('>')
|
||||||
|
Parse = BeautifulSoup(Content, 'html.parser')
|
||||||
|
Tag = Parse.find()
|
||||||
|
#if Tag.name in ('a'):
|
||||||
|
# if 'href' in Tag.attrs:
|
||||||
|
# pass
|
||||||
|
for i in Tag.contents:
|
||||||
|
ParseTag(i)
|
||||||
|
if Tag.name in ('h1', 'h2', 'h3'):
|
||||||
|
Gemtext += '#' * int(Tag.name[1]) + ' '
|
||||||
|
elif Tag.name in ('h4', 'h5', 'h6'):
|
||||||
|
Gemtext += '### '
|
||||||
|
elif Tag.name in ('li'):
|
||||||
|
Gemtext += '* '
|
||||||
|
Gemtext += str(Tag.get_text()) + '\n\n'
|
||||||
|
#print(File, Tag.name, len(Tag.contents))
|
||||||
|
if Tag.name in ClosedTags:
|
||||||
|
Str = '</{}>'.format(Tag.name)
|
||||||
|
elif Tag.name in OpenTags:
|
||||||
|
Str = '>'
|
||||||
|
BlockEnd = Content.find(Str) + len(Str)
|
||||||
|
Content = Content.replace(Content[BlockStart:TagEnd], '').replace(Content[BlockEnd-len(Str):BlockEnd], '')
|
||||||
|
#print(BlockStart, TagEnd, BlockEnd, Tag.contents)
|
||||||
|
#print(Content[BlockStart:BlockEnd])
|
||||||
|
#Gemtext += Content[BlockStart:BlockEnd]
|
||||||
|
Content = Content[BlockEnd:]
|
||||||
|
PagePath = 'public.gmi/{}.gmi'.format(StripExt(File))
|
||||||
|
WriteFile(PagePath, Gemtext)
|
||||||
|
#exit()
|
||||||
|
|
||||||
""" Gemtext:
|
""" Gemtext:
|
||||||
# h1
|
# h1
|
||||||
|
23
Source/Modules/Pug.py
Normal file
23
Source/Modules/Pug.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
""" ================================= |
|
||||||
|
| This file is part of |
|
||||||
|
| staticoso |
|
||||||
|
| Just a simple Static Site Generator |
|
||||||
|
| |
|
||||||
|
| Licensed under the AGPLv3 license |
|
||||||
|
| Copyright (C) 2022, OctoSpacc |
|
||||||
|
| ================================= """
|
||||||
|
|
||||||
|
# TODO: Write a native Pug parser; There is one already available for Python but seems broken / out-of-date
|
||||||
|
|
||||||
|
import os
|
||||||
|
from Modules.Utils import *
|
||||||
|
|
||||||
|
def PugCompileList(Pages):
|
||||||
|
# Pug-cli seems to shit itself with folder paths as input, so we pass ALL the files as arguments
|
||||||
|
Paths = ''
|
||||||
|
for File, Content, Titles, Meta in Pages:
|
||||||
|
if File.endswith('.pug'):
|
||||||
|
Path = 'public/{}'.format(File)
|
||||||
|
WriteFile(Path, Content)
|
||||||
|
Paths += '"{}" '.format(Path)
|
||||||
|
os.system('pug -P {} > /dev/null'.format(Paths))
|
Loading…
x
Reference in New Issue
Block a user