2022-07-18 17:16:21 +02:00
""" ================================= |
| This file is part of |
| staticoso |
| Just a simple Static Site Generator |
| |
| Licensed under the AGPLv3 license |
| Copyright ( C ) 2022 , OctoSpacc |
| == == == == == == == == == == == == == == == == = """
2022-08-22 17:20:35 +02:00
import html
2022-08-24 15:04:13 +02:00
import warnings
2022-08-29 17:50:14 +02:00
from Libs import htmlmin
2022-07-18 17:16:21 +02:00
from Libs . bs4 import BeautifulSoup
from Modules . Utils import *
2022-08-24 15:04:13 +02:00
# Suppress useless bs4 warnings
warnings . filterwarnings ( ' ignore ' , message = ' The input looks more like a filename than markup. ' )
2022-07-29 18:12:13 +02:00
def MkSoup ( HTML ) :
return BeautifulSoup ( HTML , ' html.parser ' )
def StripAttrs ( HTML ) :
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( )
for t in Tags :
if ' href ' not in t . attrs and ' src ' not in t . attrs :
t . attrs = { }
return str ( Soup )
2022-08-14 17:35:58 +02:00
def StripTags ( HTML , ToStrip ) : # Remove desired tags from the HTML
2022-07-29 18:12:13 +02:00
Soup = MkSoup ( HTML )
2022-07-18 17:16:21 +02:00
Tags = Soup . find_all ( )
for t in Tags :
if t . name in ToStrip :
t . replace_with ( ' ' )
return str ( Soup )
2022-08-14 17:35:58 +02:00
2022-08-24 16:11:26 +02:00
def WriteImgAltAndTitle ( HTML , AltToTitle , TitleToAlt ) : # Adds alt or title attr. to <img> which only have one of them
2022-08-14 17:35:58 +02:00
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' img ' )
for t in Tags :
2022-08-24 16:11:26 +02:00
if AltToTitle and ' alt ' in t . attrs and ' title ' not in t . attrs :
2022-08-14 17:35:58 +02:00
t . attrs . update ( { ' title ' : t . attrs [ ' alt ' ] } )
2022-08-24 16:11:26 +02:00
elif TitleToAlt and ' title ' in t . attrs and ' alt ' not in t . attrs :
2022-08-22 17:20:35 +02:00
t . attrs . update ( { ' alt ' : t . attrs [ ' title ' ] } )
2022-08-14 17:35:58 +02:00
return str ( Soup )
2022-07-29 18:12:13 +02:00
def AddToTagStartEnd ( HTML , MatchStart , MatchEnd , AddStart , AddEnd ) : # This doesn't handle nested tags
StartPos = None
for i , e in enumerate ( HTML ) :
FilterStart = HTML [ i : i + len ( MatchStart ) ]
FilterEnd = HTML [ i : i + len ( MatchEnd ) ]
if not AddStart and not AddEnd :
break
if FilterStart == MatchStart :
StartPos = i
if AddStart :
HTML = HTML [ : i ] + AddStart + HTML [ i : ]
AddStart = None
if FilterEnd == MatchEnd and StartPos and i > StartPos :
if AddEnd :
HTML = HTML [ : i + len ( MatchEnd ) ] + AddEnd + HTML [ i + len ( MatchEnd ) : ]
AddEnd = None
return HTML
def SquareFnrefs ( HTML ) : # Different combinations of formatting for Soup .prettify, .encode, .decode break different page elements, don't use this for now
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' sup ' )
for t in Tags :
if ' id ' in t . attrs and t . attrs [ ' id ' ] . startswith ( ' fnref: ' ) :
s = t . find ( ' a ' )
s . replace_with ( f ' [ { t } ] ' )
return str ( Soup . prettify ( formatter = None ) )
2022-08-29 17:50:14 +02:00
def DoMinifyHTML ( HTML , KeepComments ) :
return htmlmin . minify (
input = HTML ,
remove_comments = not KeepComments ,
remove_empty_space = True ,
remove_all_empty_space = False ,
reduce_empty_attributes = True ,
reduce_boolean_attributes = True ,
remove_optional_attribute_quotes = True ,
convert_charrefs = True ,
keep_pre = True )