mirror of https://gitlab.com/octtspacc/staticoso
Some markdown patches
This commit is contained in:
parent
f62a26eb33
commit
fb02893d21
|
@ -81,7 +81,7 @@ def Main(Args, FeedEntries):
|
||||||
Locale = LoadLocale(SiteLang)
|
Locale = LoadLocale(SiteLang)
|
||||||
MastodonURL = Args.MastodonURL if Args.MastodonURL else ''
|
MastodonURL = Args.MastodonURL if Args.MastodonURL else ''
|
||||||
MastodonToken = Args.MastodonToken if Args.MastodonToken else ''
|
MastodonToken = Args.MastodonToken if Args.MastodonToken else ''
|
||||||
MarkdownExts = literal_eval(Args.MarkdownExts) if Args.MarkdownExts else EvalOpt(ReadConf(SiteConf, 'Site', 'MarkdownExts')) if ReadConf(SiteConf, 'Site', 'MarkdownExts') else ('attr_list', 'def_list', 'markdown_del_ins', 'md_in_html', 'mdx_subscript', 'mdx_superscript', 'tables')
|
MarkdownExts = literal_eval(Args.MarkdownExts) if Args.MarkdownExts else EvalOpt(ReadConf(SiteConf, 'Site', 'MarkdownExts')) if ReadConf(SiteConf, 'Site', 'MarkdownExts') else MarkdownExtsDefault
|
||||||
ActivityPubTypeFilter = Args.ActivityPubTypeFilter if Args.ActivityPubTypeFilter else 'Post'
|
ActivityPubTypeFilter = Args.ActivityPubTypeFilter if Args.ActivityPubTypeFilter else 'Post'
|
||||||
FeedCategoryFilter = Args.FeedCategoryFilter if Args.FeedCategoryFilter else 'Blog'
|
FeedCategoryFilter = Args.FeedCategoryFilter if Args.FeedCategoryFilter else 'Blog'
|
||||||
Minify = StringBoolChoose(False, Args.Minify, ReadConf(SiteConf, 'Site', 'Minify'))
|
Minify = StringBoolChoose(False, Args.Minify, ReadConf(SiteConf, 'Site', 'Minify'))
|
||||||
|
|
|
@ -166,8 +166,9 @@ class FootnoteExtension(Extension):
|
||||||
|
|
||||||
div = etree.Element("div")
|
div = etree.Element("div")
|
||||||
div.set('class', 'footnote')
|
div.set('class', 'footnote')
|
||||||
etree.SubElement(div, "hr")
|
# etree.SubElement(div, "hr")
|
||||||
ol = etree.SubElement(div, "ol")
|
ol = etree.SubElement(div, "ol")
|
||||||
|
# ol = etree.Element("ol")
|
||||||
surrogate_parent = etree.Element("div")
|
surrogate_parent = etree.Element("div")
|
||||||
|
|
||||||
for index, id in enumerate(self.footnotes.keys(), start=1):
|
for index, id in enumerate(self.footnotes.keys(), start=1):
|
||||||
|
@ -198,6 +199,7 @@ class FootnoteExtension(Extension):
|
||||||
p = etree.SubElement(li, "p")
|
p = etree.SubElement(li, "p")
|
||||||
p.append(backlink)
|
p.append(backlink)
|
||||||
return div
|
return div
|
||||||
|
# return ol
|
||||||
|
|
||||||
|
|
||||||
class FootnoteBlockProcessor(BlockProcessor):
|
class FootnoteBlockProcessor(BlockProcessor):
|
||||||
|
|
|
@ -10,27 +10,9 @@
|
||||||
# TODO: Write the Python HTML2Gemtext converter
|
# TODO: Write the Python HTML2Gemtext converter
|
||||||
|
|
||||||
from Libs.bs4 import BeautifulSoup
|
from Libs.bs4 import BeautifulSoup
|
||||||
|
from Modules.HTML import *
|
||||||
from Modules.Utils import *
|
from Modules.Utils import *
|
||||||
|
|
||||||
"""
|
|
||||||
ClosedTags = (
|
|
||||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
||||||
'p', 'span', 'pre', 'code',
|
|
||||||
'a', 'b', 'i', 'del', 'strong',
|
|
||||||
'div', 'details', 'summary',
|
|
||||||
'ol', 'ul', 'li', 'dl', 'dt', 'dd')
|
|
||||||
OpenTags = (
|
|
||||||
'img')
|
|
||||||
"""
|
|
||||||
|
|
||||||
def StripAttrs(HTML):
|
|
||||||
Soup = BeautifulSoup(HTML, 'html.parser')
|
|
||||||
Tags = Soup.find_all()
|
|
||||||
for t in Tags:
|
|
||||||
if 'href' not in t.attrs and 'src' not in t.attrs:
|
|
||||||
t.attrs = {}
|
|
||||||
return str(Soup)
|
|
||||||
|
|
||||||
def FixGemlogDateLine(Line):
|
def FixGemlogDateLine(Line):
|
||||||
if len(Line) >= 2 and Line[0] == '[' and Line[1].isdigit():
|
if len(Line) >= 2 and Line[0] == '[' and Line[1].isdigit():
|
||||||
Line = Line[1:]
|
Line = Line[1:]
|
||||||
|
|
|
@ -10,10 +10,60 @@
|
||||||
from Libs.bs4 import BeautifulSoup
|
from Libs.bs4 import BeautifulSoup
|
||||||
from Modules.Utils import *
|
from Modules.Utils import *
|
||||||
|
|
||||||
|
"""
|
||||||
|
ClosedTags = (
|
||||||
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||||
|
'p', 'span', 'pre', 'code',
|
||||||
|
'a', 'b', 'i', 'del', 'strong',
|
||||||
|
'div', 'details', 'summary',
|
||||||
|
'ol', 'ul', 'li', 'dl', 'dt', 'dd')
|
||||||
|
OpenTags = (
|
||||||
|
'img')
|
||||||
|
"""
|
||||||
|
|
||||||
|
def MkSoup(HTML):
|
||||||
|
return BeautifulSoup(HTML, 'html.parser')
|
||||||
|
|
||||||
|
def StripAttrs(HTML):
|
||||||
|
Soup = MkSoup(HTML)
|
||||||
|
Tags = Soup.find_all()
|
||||||
|
for t in Tags:
|
||||||
|
if 'href' not in t.attrs and 'src' not in t.attrs:
|
||||||
|
t.attrs = {}
|
||||||
|
return str(Soup)
|
||||||
|
|
||||||
def StripTags(HTML, ToStrip):
|
def StripTags(HTML, ToStrip):
|
||||||
Soup = BeautifulSoup(HTML, 'html.parser')
|
Soup = MkSoup(HTML)
|
||||||
Tags = Soup.find_all()
|
Tags = Soup.find_all()
|
||||||
for t in Tags:
|
for t in Tags:
|
||||||
if t.name in ToStrip:
|
if t.name in ToStrip:
|
||||||
t.replace_with('')
|
t.replace_with('')
|
||||||
return str(Soup)
|
return str(Soup)
|
||||||
|
|
||||||
|
def AddToTagStartEnd(HTML, MatchStart, MatchEnd, AddStart, AddEnd): # This doesn't handle nested tags
|
||||||
|
StartPos = None
|
||||||
|
for i,e in enumerate(HTML):
|
||||||
|
FilterStart = HTML[i:i+len(MatchStart)]
|
||||||
|
FilterEnd = HTML[i:i+len(MatchEnd)]
|
||||||
|
if not AddStart and not AddEnd:
|
||||||
|
break
|
||||||
|
if FilterStart == MatchStart:
|
||||||
|
StartPos = i
|
||||||
|
# TagName = FirstRealItem(FirstRealItem(FilterStart.split('<')).split(' '))
|
||||||
|
if AddStart:
|
||||||
|
HTML = HTML[:i] + AddStart + HTML[i:]
|
||||||
|
AddStart = None
|
||||||
|
if FilterEnd == MatchEnd and StartPos and i > StartPos:
|
||||||
|
if AddEnd:
|
||||||
|
HTML = HTML[:i+len(MatchEnd)] + AddEnd + HTML[i+len(MatchEnd):]
|
||||||
|
AddEnd = None
|
||||||
|
return HTML
|
||||||
|
|
||||||
|
def SquareFnrefs(HTML): # Different combinations of formatting for Soup .prettify, .encode, .decode break different page elements, don't use this for now
|
||||||
|
Soup = MkSoup(HTML)
|
||||||
|
Tags = Soup.find_all('sup')
|
||||||
|
for t in Tags:
|
||||||
|
if 'id' in t.attrs and t.attrs['id'].startswith('fnref:'):
|
||||||
|
s = t.find('a')
|
||||||
|
s.replace_with(f'[{t}]')
|
||||||
|
return str(Soup.prettify(formatter=None))
|
||||||
|
|
|
@ -13,6 +13,8 @@ try:
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
from Libs.markdown import markdown
|
from Libs.markdown import markdown
|
||||||
|
|
||||||
|
MarkdownExtsDefault = ('attr_list', 'def_list', 'footnotes', 'markdown_del_ins', 'md_in_html', 'mdx_subscript', 'mdx_superscript', 'tables')
|
||||||
|
|
||||||
def MarkdownHTMLEscape(Str, Extensions=()): # WIP
|
def MarkdownHTMLEscape(Str, Extensions=()): # WIP
|
||||||
Text = ''
|
Text = ''
|
||||||
for i,e in enumerate(Str):
|
for i,e in enumerate(Str):
|
||||||
|
|
|
@ -250,11 +250,15 @@ def CanIndex(Index, For):
|
||||||
def PatchHTML(File, HTML, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale):
|
def PatchHTML(File, HTML, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale):
|
||||||
HTMLTitles = FormatTitles(Titles)
|
HTMLTitles = FormatTitles(Titles)
|
||||||
BodyDescription, BodyImage = '', ''
|
BodyDescription, BodyImage = '', ''
|
||||||
Parse = BeautifulSoup(Content, 'html.parser')
|
Soup = BeautifulSoup(Content, 'html.parser')
|
||||||
if not BodyDescription and Parse.p:
|
|
||||||
BodyDescription = Parse.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
|
if not BodyDescription and Soup.p:
|
||||||
if not BodyImage and Parse.img and Parse.img['src']:
|
BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
|
||||||
BodyImage = Parse.img['src']
|
if not BodyImage and Soup.img and Soup.img['src']:
|
||||||
|
BodyImage = Soup.img['src']
|
||||||
|
|
||||||
|
#Content = SquareFnrefs(Content)
|
||||||
|
Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')
|
||||||
|
|
||||||
Title = GetTitle(Meta, Titles, 'MetaTitle', BlogName)
|
Title = GetTitle(Meta, Titles, 'MetaTitle', BlogName)
|
||||||
Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
|
Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
|
||||||
|
@ -301,14 +305,14 @@ def PatchHTML(File, HTML, PartsText, ContextParts, ContextPartsText, HTMLPagesLi
|
||||||
|
|
||||||
# TODO: Clean this doubling?
|
# TODO: Clean this doubling?
|
||||||
ContentHTML = Content
|
ContentHTML = Content
|
||||||
ContentHTML = ContentHTML.replace('[HTML:Site:AbsoluteRoot]', SiteRoot)
|
ContentHTML = ReplWithEsc(ContentHTML, '[HTML:Site:AbsoluteRoot]', SiteRoot)
|
||||||
ContentHTML = ContentHTML.replace('[HTML:Site:RelativeRoot]', GetPathLevels(PagePath))
|
ContentHTML = ReplWithEsc(ContentHTML, '[HTML:Site:RelativeRoot]', GetPathLevels(PagePath))
|
||||||
for e in Meta['Macros']:
|
for e in Meta['Macros']:
|
||||||
ContentHTML = ContentHTML.replace(f"[:{e}:]", Meta['Macros'][e])
|
ContentHTML = ReplWithEsc(ContentHTML, f"[:{e}:]", Meta['Macros'][e])
|
||||||
for e in FolderRoots:
|
for e in FolderRoots:
|
||||||
ContentHTML = ContentHTML.replace(f"[HTML:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
|
ContentHTML = ReplWithEsc(ContentHTML, f"[HTML:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
|
||||||
for e in Categories:
|
for e in Categories:
|
||||||
ContentHTML = ContentHTML.replace(f"<span>[HTML:Category:{e}]</span>", Categories[e])
|
ContentHTML = ReplWithEsc(ContentHTML, f"<span>[HTML:Category:{e}]</span>", Categories[e])
|
||||||
SlimHTML = HTMLPagesList + ContentHTML
|
SlimHTML = HTMLPagesList + ContentHTML
|
||||||
|
|
||||||
return HTML, ContentHTML, SlimHTML, Description, Image
|
return HTML, ContentHTML, SlimHTML, Description, Image
|
||||||
|
|
|
@ -99,6 +99,9 @@ def RevSort(List):
|
||||||
List.reverse()
|
List.reverse()
|
||||||
return List
|
return List
|
||||||
|
|
||||||
|
def FirstRealItem(List):
|
||||||
|
return next(e for e in List if e)
|
||||||
|
|
||||||
def GetFullDate(Date):
|
def GetFullDate(Date):
|
||||||
if not Date:
|
if not Date:
|
||||||
return None
|
return None
|
||||||
|
|
1
TODO
1
TODO
|
@ -1,5 +1,6 @@
|
||||||
- Fix arguments - some are only callable from CLI and not Site.ini
|
- Fix arguments - some are only callable from CLI and not Site.ini
|
||||||
- Fix Python-Markdown is installed problem (to load our modules)
|
- Fix Python-Markdown is installed problem (to load our modules)
|
||||||
|
- Postprocessing markdown footnotes to add brackets
|
||||||
- Hot-recompile
|
- Hot-recompile
|
||||||
- Differential recompile
|
- Differential recompile
|
||||||
- Feed generation without native libraries
|
- Feed generation without native libraries
|
||||||
|
|
Loading…
Reference in New Issue