2022-07-18 17:16:21 +02:00
""" ================================= |
| This file is part of |
| staticoso |
| Just a simple Static Site Generator |
| |
| Licensed under the AGPLv3 license |
| Copyright ( C ) 2022 , OctoSpacc |
| == == == == == == == == == == == == == == == == = """
2022-08-22 17:20:35 +02:00
import html
2022-08-24 15:04:13 +02:00
import warnings
2022-08-29 17:50:14 +02:00
from Libs import htmlmin
2022-07-18 17:16:21 +02:00
from Libs . bs4 import BeautifulSoup
from Modules . Utils import *
2022-08-24 15:04:13 +02:00
# Suppress useless bs4 warnings
warnings . filterwarnings ( ' ignore ' , message = ' The input looks more like a filename than markup. ' )
2022-07-29 18:12:13 +02:00
def MkSoup ( HTML ) :
return BeautifulSoup ( HTML , ' html.parser ' )
def StripAttrs ( HTML ) :
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( )
for t in Tags :
if ' href ' not in t . attrs and ' src ' not in t . attrs :
t . attrs = { }
return str ( Soup )
2022-08-14 17:35:58 +02:00
def StripTags ( HTML , ToStrip ) : # Remove desired tags from the HTML
2022-07-29 18:12:13 +02:00
Soup = MkSoup ( HTML )
2022-07-18 17:16:21 +02:00
Tags = Soup . find_all ( )
for t in Tags :
if t . name in ToStrip :
t . replace_with ( ' ' )
return str ( Soup )
2022-08-14 17:35:58 +02:00
2022-09-03 17:48:39 +02:00
def DoHTMLFixPre ( HTML ) :
if not ( " <pre> " in HTML or " <pre " in HTML ) :
return HTML
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' pre ' )
for t in Tags :
FirstLine = str ( t ) . splitlines ( ) [ 0 ] . lstrip ( ) . rstrip ( )
if FirstLine . endswith ( ' > ' ) :
New = MkSoup ( str ( t ) . replace ( ' \n ' , ' ' , 1 ) )
t . replace_with ( New . pre )
return str ( Soup )
2022-08-24 16:11:26 +02:00
def WriteImgAltAndTitle ( HTML , AltToTitle , TitleToAlt ) : # Adds alt or title attr. to <img> which only have one of them
2022-08-14 17:35:58 +02:00
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' img ' )
for t in Tags :
2022-08-24 16:11:26 +02:00
if AltToTitle and ' alt ' in t . attrs and ' title ' not in t . attrs :
2022-08-14 17:35:58 +02:00
t . attrs . update ( { ' title ' : t . attrs [ ' alt ' ] } )
2022-08-24 16:11:26 +02:00
elif TitleToAlt and ' title ' in t . attrs and ' alt ' not in t . attrs :
2022-08-22 17:20:35 +02:00
t . attrs . update ( { ' alt ' : t . attrs [ ' title ' ] } )
2022-08-14 17:35:58 +02:00
return str ( Soup )
2022-07-29 18:12:13 +02:00
def AddToTagStartEnd ( HTML , MatchStart , MatchEnd , AddStart , AddEnd ) : # This doesn't handle nested tags
2022-12-20 13:05:41 +01:00
StartPos , DidStart , DidEnd = None , 0 , 0
2022-07-29 18:12:13 +02:00
for i , e in enumerate ( HTML ) :
FilterStart = HTML [ i : i + len ( MatchStart ) ]
FilterEnd = HTML [ i : i + len ( MatchEnd ) ]
2022-12-20 13:05:41 +01:00
if DidStart == 0 and FilterStart == MatchStart :
2022-07-29 18:12:13 +02:00
StartPos = i
if AddStart :
HTML = HTML [ : i ] + AddStart + HTML [ i : ]
2022-12-20 13:05:41 +01:00
DidStart = 2
if DidEnd == 0 and FilterEnd == MatchEnd and StartPos and i > StartPos :
StartPos = None
2022-07-29 18:12:13 +02:00
if AddEnd :
HTML = HTML [ : i + len ( MatchEnd ) ] + AddEnd + HTML [ i + len ( MatchEnd ) : ]
2022-12-20 13:05:41 +01:00
DidEnd = 2
if DidStart > 0 :
DidStart - = 1
if DidEnd > 0 :
DidEnd - = 1
2022-07-29 18:12:13 +02:00
return HTML
def SquareFnrefs ( HTML ) : # Different combinations of formatting for Soup .prettify, .encode, .decode break different page elements, don't use this for now
Soup = MkSoup ( HTML )
Tags = Soup . find_all ( ' sup ' )
for t in Tags :
if ' id ' in t . attrs and t . attrs [ ' id ' ] . startswith ( ' fnref: ' ) :
s = t . find ( ' a ' )
s . replace_with ( f ' [ { t } ] ' )
return str ( Soup . prettify ( formatter = None ) )
2022-08-29 17:50:14 +02:00
def DoMinifyHTML ( HTML , KeepComments ) :
return htmlmin . minify (
input = HTML ,
remove_comments = not KeepComments ,
remove_empty_space = True ,
remove_all_empty_space = False ,
reduce_empty_attributes = True ,
reduce_boolean_attributes = True ,
remove_optional_attribute_quotes = True ,
convert_charrefs = True ,
keep_pre = True )