mirror of
https://gitlab.com/octtspacc/staticoso
synced 2025-06-05 22:09:23 +02:00
Some markdown patches
This commit is contained in:
@@ -10,27 +10,9 @@
|
||||
# TODO: Write the Python HTML2Gemtext converter
|
||||
|
||||
from Libs.bs4 import BeautifulSoup
|
||||
from Modules.HTML import *
|
||||
from Modules.Utils import *
|
||||
|
||||
"""
|
||||
ClosedTags = (
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'p', 'span', 'pre', 'code',
|
||||
'a', 'b', 'i', 'del', 'strong',
|
||||
'div', 'details', 'summary',
|
||||
'ol', 'ul', 'li', 'dl', 'dt', 'dd')
|
||||
OpenTags = (
|
||||
'img')
|
||||
"""
|
||||
|
||||
def StripAttrs(HTML):
|
||||
Soup = BeautifulSoup(HTML, 'html.parser')
|
||||
Tags = Soup.find_all()
|
||||
for t in Tags:
|
||||
if 'href' not in t.attrs and 'src' not in t.attrs:
|
||||
t.attrs = {}
|
||||
return str(Soup)
|
||||
|
||||
def FixGemlogDateLine(Line):
|
||||
if len(Line) >= 2 and Line[0] == '[' and Line[1].isdigit():
|
||||
Line = Line[1:]
|
||||
|
@@ -10,10 +10,60 @@
|
||||
from Libs.bs4 import BeautifulSoup
|
||||
from Modules.Utils import *
|
||||
|
||||
"""
|
||||
ClosedTags = (
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'p', 'span', 'pre', 'code',
|
||||
'a', 'b', 'i', 'del', 'strong',
|
||||
'div', 'details', 'summary',
|
||||
'ol', 'ul', 'li', 'dl', 'dt', 'dd')
|
||||
OpenTags = (
|
||||
'img')
|
||||
"""
|
||||
|
||||
def MkSoup(HTML):
|
||||
return BeautifulSoup(HTML, 'html.parser')
|
||||
|
||||
def StripAttrs(HTML):
|
||||
Soup = MkSoup(HTML)
|
||||
Tags = Soup.find_all()
|
||||
for t in Tags:
|
||||
if 'href' not in t.attrs and 'src' not in t.attrs:
|
||||
t.attrs = {}
|
||||
return str(Soup)
|
||||
|
||||
def StripTags(HTML, ToStrip):
|
||||
Soup = BeautifulSoup(HTML, 'html.parser')
|
||||
Soup = MkSoup(HTML)
|
||||
Tags = Soup.find_all()
|
||||
for t in Tags:
|
||||
if t.name in ToStrip:
|
||||
t.replace_with('')
|
||||
return str(Soup)
|
||||
|
||||
def AddToTagStartEnd(HTML, MatchStart, MatchEnd, AddStart, AddEnd): # This doesn't handle nested tags
|
||||
StartPos = None
|
||||
for i,e in enumerate(HTML):
|
||||
FilterStart = HTML[i:i+len(MatchStart)]
|
||||
FilterEnd = HTML[i:i+len(MatchEnd)]
|
||||
if not AddStart and not AddEnd:
|
||||
break
|
||||
if FilterStart == MatchStart:
|
||||
StartPos = i
|
||||
# TagName = FirstRealItem(FirstRealItem(FilterStart.split('<')).split(' '))
|
||||
if AddStart:
|
||||
HTML = HTML[:i] + AddStart + HTML[i:]
|
||||
AddStart = None
|
||||
if FilterEnd == MatchEnd and StartPos and i > StartPos:
|
||||
if AddEnd:
|
||||
HTML = HTML[:i+len(MatchEnd)] + AddEnd + HTML[i+len(MatchEnd):]
|
||||
AddEnd = None
|
||||
return HTML
|
||||
|
||||
def SquareFnrefs(HTML): # Different combinations of formatting for Soup .prettify, .encode, .decode break different page elements, don't use this for now
|
||||
Soup = MkSoup(HTML)
|
||||
Tags = Soup.find_all('sup')
|
||||
for t in Tags:
|
||||
if 'id' in t.attrs and t.attrs['id'].startswith('fnref:'):
|
||||
s = t.find('a')
|
||||
s.replace_with(f'[{t}]')
|
||||
return str(Soup.prettify(formatter=None))
|
||||
|
@@ -13,6 +13,8 @@ try:
|
||||
except ModuleNotFoundError:
|
||||
from Libs.markdown import markdown
|
||||
|
||||
MarkdownExtsDefault = ('attr_list', 'def_list', 'footnotes', 'markdown_del_ins', 'md_in_html', 'mdx_subscript', 'mdx_superscript', 'tables')
|
||||
|
||||
def MarkdownHTMLEscape(Str, Extensions=()): # WIP
|
||||
Text = ''
|
||||
for i,e in enumerate(Str):
|
||||
|
@@ -250,11 +250,15 @@ def CanIndex(Index, For):
|
||||
def PatchHTML(File, HTML, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, SiteName, BlogName, FolderRoots, Categories, SiteLang, Locale):
|
||||
HTMLTitles = FormatTitles(Titles)
|
||||
BodyDescription, BodyImage = '', ''
|
||||
Parse = BeautifulSoup(Content, 'html.parser')
|
||||
if not BodyDescription and Parse.p:
|
||||
BodyDescription = Parse.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
|
||||
if not BodyImage and Parse.img and Parse.img['src']:
|
||||
BodyImage = Parse.img['src']
|
||||
Soup = BeautifulSoup(Content, 'html.parser')
|
||||
|
||||
if not BodyDescription and Soup.p:
|
||||
BodyDescription = Soup.p.get_text()[:150].replace('\n', ' ').replace('"', "'") + '...'
|
||||
if not BodyImage and Soup.img and Soup.img['src']:
|
||||
BodyImage = Soup.img['src']
|
||||
|
||||
#Content = SquareFnrefs(Content)
|
||||
Content = AddToTagStartEnd(Content, '<a class="footnote-ref"', '</a>', '[', ']')
|
||||
|
||||
Title = GetTitle(Meta, Titles, 'MetaTitle', BlogName)
|
||||
Description = GetDescription(Meta, BodyDescription, 'MetaDescription')
|
||||
@@ -301,14 +305,14 @@ def PatchHTML(File, HTML, PartsText, ContextParts, ContextPartsText, HTMLPagesLi
|
||||
|
||||
# TODO: Clean this doubling?
|
||||
ContentHTML = Content
|
||||
ContentHTML = ContentHTML.replace('[HTML:Site:AbsoluteRoot]', SiteRoot)
|
||||
ContentHTML = ContentHTML.replace('[HTML:Site:RelativeRoot]', GetPathLevels(PagePath))
|
||||
ContentHTML = ReplWithEsc(ContentHTML, '[HTML:Site:AbsoluteRoot]', SiteRoot)
|
||||
ContentHTML = ReplWithEsc(ContentHTML, '[HTML:Site:RelativeRoot]', GetPathLevels(PagePath))
|
||||
for e in Meta['Macros']:
|
||||
ContentHTML = ContentHTML.replace(f"[:{e}:]", Meta['Macros'][e])
|
||||
ContentHTML = ReplWithEsc(ContentHTML, f"[:{e}:]", Meta['Macros'][e])
|
||||
for e in FolderRoots:
|
||||
ContentHTML = ContentHTML.replace(f"[HTML:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
|
||||
ContentHTML = ReplWithEsc(ContentHTML, f"[HTML:Folder:{e}:AbsoluteRoot]", FolderRoots[e])
|
||||
for e in Categories:
|
||||
ContentHTML = ContentHTML.replace(f"<span>[HTML:Category:{e}]</span>", Categories[e])
|
||||
ContentHTML = ReplWithEsc(ContentHTML, f"<span>[HTML:Category:{e}]</span>", Categories[e])
|
||||
SlimHTML = HTMLPagesList + ContentHTML
|
||||
|
||||
return HTML, ContentHTML, SlimHTML, Description, Image
|
||||
|
@@ -99,6 +99,9 @@ def RevSort(List):
|
||||
List.reverse()
|
||||
return List
|
||||
|
||||
def FirstRealItem(List):
|
||||
return next(e for e in List if e)
|
||||
|
||||
def GetFullDate(Date):
|
||||
if not Date:
|
||||
return None
|
||||
|
Reference in New Issue
Block a user