2022-07-18 17:16:21 +02:00
""" ================================= |
| This file is part of |
| staticoso |
| Just a simple Static Site Generator |
| |
| Licensed under the AGPLv3 license |
| Copyright ( C ) 2022 , OctoSpacc |
| == == == == == == == == == == == == == == == == = """
2022-08-22 17:20:35 +02:00
import html
2022-07-18 17:16:21 +02:00
from Libs . bs4 import BeautifulSoup
from Modules . Utils import *
2022-07-29 18:12:13 +02:00
"""
ClosedTags = (
' h1 ' , ' h2 ' , ' h3 ' , ' h4 ' , ' h5 ' , ' h6 ' ,
' p ' , ' span ' , ' pre ' , ' code ' ,
' a ' , ' b ' , ' i ' , ' del ' , ' strong ' ,
' div ' , ' details ' , ' summary ' ,
' ol ' , ' ul ' , ' li ' , ' dl ' , ' dt ' , ' dd ' )
OpenTags = (
' img ' )
"""
def MkSoup ( HTML ) :
return BeautifulSoup ( HTML , ' html.parser ' )
def StripAttrs ( HTML ) :
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( )
for t in Tags :
if ' href ' not in t . attrs and ' src ' not in t . attrs :
t . attrs = { }
return str ( Soup )
2022-08-14 17:35:58 +02:00
def StripTags ( HTML , ToStrip ) : # Remove desired tags from the HTML
2022-07-29 18:12:13 +02:00
Soup = MkSoup ( HTML )
2022-07-18 17:16:21 +02:00
Tags = Soup . find_all ( )
for t in Tags :
if t . name in ToStrip :
t . replace_with ( ' ' )
return str ( Soup )
2022-08-14 17:35:58 +02:00
2022-08-22 17:20:35 +02:00
def WriteImgAltAndTitle ( HTML ) : # Adds alt or title attr. to <img> which only have one of them
2022-08-14 17:35:58 +02:00
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' img ' )
for t in Tags :
if ' alt ' in t . attrs and ' title ' not in t . attrs :
t . attrs . update ( { ' title ' : t . attrs [ ' alt ' ] } )
2022-08-22 17:20:35 +02:00
elif ' title ' in t . attrs and ' alt ' not in t . attrs :
t . attrs . update ( { ' alt ' : t . attrs [ ' title ' ] } )
2022-08-14 17:35:58 +02:00
return str ( Soup )
2022-07-29 18:12:13 +02:00
def AddToTagStartEnd ( HTML , MatchStart , MatchEnd , AddStart , AddEnd ) : # This doesn't handle nested tags
StartPos = None
for i , e in enumerate ( HTML ) :
FilterStart = HTML [ i : i + len ( MatchStart ) ]
FilterEnd = HTML [ i : i + len ( MatchEnd ) ]
if not AddStart and not AddEnd :
break
if FilterStart == MatchStart :
StartPos = i
# TagName = FirstRealItem(FirstRealItem(FilterStart.split('<')).split(' '))
if AddStart :
HTML = HTML [ : i ] + AddStart + HTML [ i : ]
AddStart = None
if FilterEnd == MatchEnd and StartPos and i > StartPos :
if AddEnd :
HTML = HTML [ : i + len ( MatchEnd ) ] + AddEnd + HTML [ i + len ( MatchEnd ) : ]
AddEnd = None
return HTML
def SquareFnrefs ( HTML ) : # Different combinations of formatting for Soup .prettify, .encode, .decode break different page elements, don't use this for now
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' sup ' )
for t in Tags :
if ' id ' in t . attrs and t . attrs [ ' id ' ] . startswith ( ' fnref: ' ) :
s = t . find ( ' a ' )
s . replace_with ( f ' [ { t } ] ' )
return str ( Soup . prettify ( formatter = None ) )