From 2cc2bfc62afce2a3653c606f4c4f8b80ca19767a Mon Sep 17 00:00:00 2001 From: octospacc Date: Mon, 20 Jun 2022 16:16:41 +0200 Subject: [PATCH] Cleaned code a bit, added HTML minification --- .gitignore | 1 + Locale/en.json | 4 + Locale/it.json | 4 + README.md | 5 +- Source/Build.py | 172 +++---- Source/Libs/htmlmin/__init__.py | 30 ++ Source/Libs/htmlmin/command.py | 175 +++++++ Source/Libs/htmlmin/decorator.py | 64 +++ Source/Libs/htmlmin/escape.py | 204 +++++++++ Source/Libs/htmlmin/main.py | 193 ++++++++ Source/Libs/htmlmin/middleware.py | 92 ++++ Source/Libs/htmlmin/parser.py | 408 +++++++++++++++++ Source/Libs/htmlmin/python3html/__init__.py | 139 ++++++ Source/Libs/htmlmin/python3html/parser.py | 481 ++++++++++++++++++++ 14 files changed, 1892 insertions(+), 80 deletions(-) create mode 100644 .gitignore create mode 100644 Locale/en.json create mode 100644 Locale/it.json create mode 100644 Source/Libs/htmlmin/__init__.py create mode 100755 Source/Libs/htmlmin/command.py create mode 100644 Source/Libs/htmlmin/decorator.py create mode 100644 Source/Libs/htmlmin/escape.py create mode 100644 Source/Libs/htmlmin/main.py create mode 100644 Source/Libs/htmlmin/middleware.py create mode 100644 Source/Libs/htmlmin/parser.py create mode 100644 Source/Libs/htmlmin/python3html/__init__.py create mode 100644 Source/Libs/htmlmin/python3html/parser.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/Locale/en.json b/Locale/en.json new file mode 100644 index 0000000..933e803 --- /dev/null +++ b/Locale/en.json @@ -0,0 +1,4 @@ +{ + "CreatedOn": "Created on", + "EditedOn": "Edited on" +} diff --git a/Locale/it.json b/Locale/it.json new file mode 100644 index 0000000..15d2df1 --- /dev/null +++ b/Locale/it.json @@ -0,0 +1,4 @@ +{ + "CreatedOn": "Creato in data", + "EditedOn": "Modificato in data" +} diff --git a/README.md b/README.md index 12818df..e8dded5 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,11 @@ Feel free to experiment with all of this stuff! ## Dependencies - [Python >= 3.10.4](https://python.org) - [Python Markdown >= 3.3.7](https://pypi.org/project/Markdown) +- (Included) [htmlmin >= 0.1.12](https://pypi.org/project/htmlmin) - [pug-cli >= 1.0.0-alpha6](https://npmjs.com/package/pug-cli) ## Features roadmap +- [x] HTML minification - [ ] Open Graph support - [x] Custom categories for posts - [x] Custom static page parts programmable by context @@ -32,6 +34,5 @@ Feel free to experiment with all of this stuff! - [x] Generation of titles in right sidebar with clickable links - [x] Detections of titles in a page - [x] Custom static page parts by template -- [x] Pug support for pages -- [x] Markdown support for pages +- [x] Markdown + Pug support for pages - [x] First working version diff --git a/Source/Build.py b/Source/Build.py index 55398b8..954a828 100755 --- a/Source/Build.py +++ b/Source/Build.py @@ -8,12 +8,17 @@ | ================================= """ import argparse +import json +from Libs import htmlmin import os import shutil from ast import literal_eval from markdown import Markdown from pathlib import Path +Extensions = { + 'Pages': ('md', 'pug')} + def ReadFile(p): try: with open(p, 'r') as f: @@ -31,6 +36,15 @@ def WriteFile(p, c): print("Error writing file {}".format(p)) return False +def LoadLocale(Lang): + Lang = Lang + '.json' + Folder = os.path.dirname(os.path.abspath(__file__)) + '/../Locale/' + File = ReadFile(Folder + Lang) + if File: + return json.loads(File) + else: + return json.loads(ReadFile(Folder + 'en.json')) + def StripExt(Path): return ".".join(Path.split('.')[:-1]) @@ -44,20 +58,6 @@ def GetLevels(Path, Sub=0, AsNum=False): n = Path.count('/') return n if AsNum else '../' * n -def GetDeepest(Paths): - Deepest = 0 - for p in Paths: - l = GetLevels(p, True) - if l > Deepest: - Deepest = l - print(Deepest) - return Deepest - -def GetRelative(Path, Levels): - print(Path, Levels) - #return GetLevels(Path, Levels) - return '../' * Levels - def DashifyStr(s, Limit=32): Str, lc = '', Limit for c in s[:Limit].replace(' ','-').replace(' ','-'): @@ -97,14 +97,13 @@ def GetTitleIdLine(Line, Title, Type): NewLine += Line[Index+2:] return NewLine -def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, CurLevels, PathPrefix=''): - print(PathPrefix) +def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, PathPrefix=''): Title = GetTitle(Meta, Titles, Prefer) Link = False if Meta['Index'] == 'Unlinked' else True if Link: Title = '[{}]({})'.format( Title, - '{}{}.html'.format(PathPrefix, StripExt(File))) #(GetRelative(File, CurLevels), StripExt(File))) + '{}{}.html'.format(PathPrefix, StripExt(File))) if Meta['Type'] == 'Post' and Meta['CreatedOn']: Title = '[{}] {}'.format( Meta['CreatedOn'], @@ -193,20 +192,16 @@ def PugCompileList(Pages): Paths += '"{}" '.format(Path) os.system('pug -P {} > /dev/null'.format(Paths)) -def MakeContentHeader(Meta): +def MakeContentHeader(Meta, Locale): Header = '' if Meta['Type'] == 'Post': - # TODO: Fix the hardcoded italian - if Meta['CreatedOn'] and Meta['EditedOn']: - Header += "Creato in data {} \nModificato in data {} \n".format(Meta['CreatedOn'], Meta['EditedOn']) - elif Meta['CreatedOn'] and not Meta['EditedOn']: - Header += "Creato in data {} \n".format(Meta['CreatedOn']) - elif Meta['EditedOn'] and not Meta['CreatedOn']: - Header += "Modificato in data {} \n".format(Meta['EditedOn']) + if Meta['CreatedOn']: + Header += "{} {} \n".format(Locale['CreatedOn'], Meta['CreatedOn']) + if Meta['EditedOn']: + Header += "{} {} \n".format(Locale['EditedOn'], Meta['EditedOn']) return Markdown().convert(Header) -def PatchHTML(Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, FolderRoots, Categories): - print(PagePath) +def PatchHTML(Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, FolderRoots, Categories, Locale): HTMLTitles = FormatTitles(Titles) for Line in Template.splitlines(): Line = Line.lstrip().rstrip() @@ -233,7 +228,7 @@ def PatchHTML(Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList Template = Template.replace('[HTML:Page:Path]', PagePath) Template = Template.replace('[HTML:Page:Style]', Meta['Style']) Template = Template.replace('[HTML:Page:Content]', Content) - Template = Template.replace('[HTML:Page:ContentHeader]', MakeContentHeader(Meta)) + Template = Template.replace('[HTML:Page:ContentHeader]', MakeContentHeader(Meta, Locale)) Template = Template.replace('[HTML:Site:AbsoluteRoot]', SiteRoot) Template = Template.replace('[HTML:Site:RelativeRoot]', GetLevels(PagePath)) for i in FolderRoots: @@ -260,10 +255,8 @@ def OrderPages(Old): New.remove([]) return New -def GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, Type='Page', Category=None): - List = '' - ToPop = [] - LastParent = [] +def GetHTMLPagesList(Pages, SiteRoot, PathPrefix, Type='Page', Category=None): + List, ToPop, LastParent = '', [], [] IndexPages = Pages.copy() for e in IndexPages: if e[3]['Index'] == 'False' or e[3]['Index'] == 'None': @@ -271,8 +264,7 @@ def GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, Type='Page', Catego for i,e in enumerate(IndexPages): if e[3]['Type'] != Type: ToPop += [i] - ToPop.sort() - ToPop.reverse() + ToPop = RevSort(ToPop) for i in ToPop: IndexPages.pop(i) if Type == 'Page': @@ -287,83 +279,107 @@ def GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, Type='Page', Catego LastParent = CurParent Levels = '- ' * (n-1+i) if File[:-3].endswith('index.'): - Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, CurLevels, PathPrefix) + Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix) else: Title = CurParent[n-2+i] List += Levels + Title + '\n' if not (n > 1 and File[:-3].endswith('index.')): Levels = '- ' * n - Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, CurLevels, PathPrefix) + Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix) List += Levels + Title + '\n' return Markdown().convert(List) def DelTmp(): - for File in Path('public').rglob('*.pug'): - os.remove(File) - for File in Path('public').rglob('*.md'): - os.remove(File) + for Ext in Extensions['Pages']: + for File in Path('public').rglob('*.{}'.format(Ext)): + os.remove(File) -def MakeSite(TemplatesText, PartsText, ContextParts, ContextPartsText, SiteRoot, FolderRoots): - Files = [] - Pages = [] - Categories = {} - for File in Path('Pages').rglob('*.pug'): - Files += [FileToStr(File, 'Pages/')] - for File in Path('Pages').rglob('*.md'): - Files += [FileToStr(File, 'Pages/')] - Files.sort() - Files.reverse() +def RevSort(List): + List.sort() + List.reverse() + return List + +def DoMinify(HTML): + return htmlmin.minify( + input=HTML, + remove_comments=True, + remove_empty_space=True, + remove_all_empty_space=False, + reduce_empty_attributes=True, + reduce_boolean_attributes=True, + remove_optional_attribute_quotes=True, + convert_charrefs=True, + keep_pre=True) + +def MakeSite(TemplatesText, PartsText, ContextParts, ContextPartsText, SiteRoot, FolderRoots, Locale, Minify): + Files, Pages, Categories = [], [], {} + for Ext in Extensions['Pages']: + for File in Path('Pages').rglob('*.{}'.format(Ext)): + Files += [FileToStr(File, 'Pages/')] + Files = RevSort(Files) for File in Files: Content, Titles, Meta = PreProcessor('Pages/{}'.format(File), SiteRoot) Pages += [[File, Content, Titles, Meta]] for Category in Meta['Categories']: Categories.update({Category:''}) PugCompileList(Pages) - print(Files) for Category in Categories: - Categories[Category] = GetHTMLPagesList(Pages, SiteRoot, 0, '../../', 'Post', Category) + Categories[Category] = GetHTMLPagesList( + Pages=Pages, + SiteRoot=SiteRoot, + PathPrefix='../../', # This hardcodes paths, TODO make it somehow guess the path for every page containing the [HTML:Category] macro + Type='Post', + Category=Category) for File, Content, Titles, Meta in Pages: - CurLevels = GetLevels(File, 0, True) - PathPrefix = GetLevels(File) - print(PathPrefix) - print(File, CurLevels) - HTMLPagesList = GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, 'Page') + HTMLPagesList = GetHTMLPagesList( + Pages=Pages, + SiteRoot=SiteRoot, + PathPrefix=GetLevels(File), + Type='Page') PagePath = 'public/{}.html'.format(StripExt(File)) if File.endswith('.md'): Content = Markdown().convert(Content) elif File.endswith('.pug'): Content = ReadFile(PagePath) - Template = TemplatesText[Meta['Template']] - Template = Template.replace( - '[HTML:Site:AbsoluteRoot]', - SiteRoot) - Template = Template.replace( - '[HTML:Site:RelativeRoot]', - GetLevels(File)) - WriteFile( - PagePath, - PatchHTML( - Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList, - PagePath[len('public/'):], Content, Titles, Meta, SiteRoot, FolderRoots, Categories)) + HTML = PatchHTML( + Template=TemplatesText[Meta['Template']], + PartsText=PartsText, + ContextParts=ContextParts, + ContextPartsText=ContextPartsText, + HTMLPagesList=HTMLPagesList, + PagePath=PagePath[len('public/'):], + Content=Content, + Titles=Titles, + Meta=Meta, + SiteRoot=SiteRoot, + FolderRoots=FolderRoots, + Categories=Categories, + Locale=Locale) + if Minify != 'False' and Minify != 'None': + HTML = DoMinify(HTML) + WriteFile(PagePath, HTML) DelTmp() def Main(Args): ResetPublic() shutil.copytree('Pages', 'public') MakeSite( - LoadFromDir('Templates', '*.html'), - LoadFromDir('Parts', '*.html'), - literal_eval(Args.ContextParts) if Args.ContextParts else {}, - LoadFromDir('ContextParts', '*.html'), - Args.SiteRoot if Args.SiteRoot else '/', - literal_eval(Args.FolderRoots) if Args.FolderRoots else {}) + TemplatesText=LoadFromDir('Templates', '*.html'), + PartsText=LoadFromDir('Parts', '*.html'), + ContextParts=literal_eval(Args.ContextParts) if Args.ContextParts else {}, + ContextPartsText=LoadFromDir('ContextParts', '*.html'), + SiteRoot=Args.SiteRoot if Args.SiteRoot else '/', + FolderRoots=literal_eval(Args.FolderRoots) if Args.FolderRoots else {}, + Locale=LoadLocale(Args.SiteLang if Args.SiteLang else 'en'), + Minify=Args.Minify if Args.Minify else 'None') os.system("cp -R Assets/* public/") if __name__ == '__main__': Parser = argparse.ArgumentParser() + Parser.add_argument('--SiteLang', type=str) Parser.add_argument('--SiteRoot', type=str) Parser.add_argument('--FolderRoots', type=str) Parser.add_argument('--ContextParts', type=str) - Args = Parser.parse_args() - - Main(Args) + Parser.add_argument('--Minify', type=str) + Main( + Args=Parser.parse_args()) diff --git a/Source/Libs/htmlmin/__init__.py b/Source/Libs/htmlmin/__init__.py new file mode 100644 index 0000000..b6d4c4c --- /dev/null +++ b/Source/Libs/htmlmin/__init__.py @@ -0,0 +1,30 @@ +""" +Copyright (c) 2013, Dave Mankoff +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Dave Mankoff nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from .main import minify, Minifier + +__version__ = '0.1.12' diff --git a/Source/Libs/htmlmin/command.py b/Source/Libs/htmlmin/command.py new file mode 100755 index 0000000..ba3a427 --- /dev/null +++ b/Source/Libs/htmlmin/command.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +""" +Copyright (c) 2013, Dave Mankoff +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Dave Mankoff nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import argparse +import codecs +import locale +import io +import sys + +#import htmlmin +from . import Minifier + +parser = argparse.ArgumentParser( + description='Minify HTML', + formatter_class=argparse.RawTextHelpFormatter + ) + +parser.add_argument('input_file', + nargs='?', + metavar='INPUT', + help='File path to html file to minify. Defaults to stdin.', + ) + +parser.add_argument('output_file', + nargs='?', + metavar='OUTPUT', + help="File path to output to. Defaults to stdout.", + ) + +parser.add_argument('-c', '--remove-comments', + help=( +'''When set, comments will be removed. They can be kept on an individual basis +by starting them with a '!': . The '!' will be removed from +the final output. If you want a '!' as the leading character of your comment, +put two of them: . + +'''), + action='store_true') + +parser.add_argument('-s', '--remove-empty-space', + help=( +'''When set, this removes empty space betwen tags in certain cases. +Specifically, it will remove empty space if and only if there a newline +character occurs within the space. Thus, code like +'x y' will be left alone, but code such as +' ... + + + ...' +will become '......'. Note that this CAN break your +html if you spread two inline tags over two lines. Use with caution. + +'''), + action='store_true') + +parser.add_argument('--remove-all-empty-space', + help=( +'''When set, this removes ALL empty space betwen tags. WARNING: this can and +likely will cause unintended consequences. For instance, 'X Y' +will become 'XY'. Putting whitespace along with other text will +avoid this problem. Only use if you are confident in the result. Whitespace is +not removed from inside of tags, thus ' ' will be left alone. + +'''), + action='store_true') + +parser.add_argument('--keep-optional-attribute-quotes', + help=( +'''When set, this keeps all attribute quotes, even if they are optional. + +'''), + action='store_true') + +parser.add_argument('-H', '--in-head', + help=( +'''If you are parsing only a fragment of HTML, and the fragment occurs in the +head of the document, setting this will remove some extra whitespace. + +'''), + action='store_true') + +parser.add_argument('-k', '--keep-pre-attr', + help=( +'''HTMLMin supports the propietary attribute 'pre' that can be added to elements +to prevent minification. This attribute is removed by default. Set this flag to +keep the 'pre' attributes in place. + +'''), + action='store_true') + +parser.add_argument('-a', '--pre-attr', + help=( +'''The attribute htmlmin looks for to find blocks of HTML that it should not +minify. This attribute will be removed from the HTML unless '-k' is +specified. Defaults to 'pre'. + +'''), + default='pre') + + +parser.add_argument('-p', '--pre-tags', + metavar='TAG', + help=( +'''By default, the contents of 'pre', and 'textarea' tags are left unminified. +You can specify different tags using the --pre-tags option. 'script' and 'style' +tags are always left unmininfied. + +'''), + nargs='*', + default=['pre', 'textarea']) +parser.add_argument('-e', '--encoding', + help=("Encoding to read and write with. Default 'utf-8'." + " When reading from stdin, attempts to use the system's" + " encoding before defaulting to utf-8.\n\n"), + default=None, + ) + +def main(): + args = parser.parse_args() + minifier = Minifier( + remove_comments=args.remove_comments, + remove_empty_space=args.remove_empty_space, + remove_optional_attribute_quotes=not args.keep_optional_attribute_quotes, + pre_tags=args.pre_tags, + keep_pre=args.keep_pre_attr, + pre_attr=args.pre_attr, + ) + default_encoding = args.encoding or 'utf-8' + + if args.input_file: + inp = codecs.open(args.input_file, encoding=default_encoding) + else: + encoding = args.encoding or sys.stdin.encoding \ + or locale.getpreferredencoding() or default_encoding + inp = io.open(sys.stdin.fileno(), encoding=encoding) + + for line in inp.readlines(): + minifier.input(line) + + if args.output_file: + codecs.open( + args.output_file, 'w', encoding=default_encoding).write(minifier.output) + else: + encoding = args.encoding or sys.stdout.encoding \ + or locale.getpreferredencoding() or default_encoding + io.open(sys.stdout.fileno(), 'w', encoding=encoding).write(minifier.output) + +if __name__ == '__main__': + main() + diff --git a/Source/Libs/htmlmin/decorator.py b/Source/Libs/htmlmin/decorator.py new file mode 100644 index 0000000..6e26597 --- /dev/null +++ b/Source/Libs/htmlmin/decorator.py @@ -0,0 +1,64 @@ +""" +Copyright (c) 2013, Dave Mankoff +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Dave Mankoff nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from .main import Minifier + +def htmlmin(*args, **kwargs): + """Minifies HTML that is returned by a function. + + A simple decorator that minifies the HTML output of any function that it + decorates. It supports all the same options that :class:`htmlmin.minify` has. + With no options, it uses ``minify``'s default settings:: + + @htmlmin + def foobar(): + return ' minify me! ' + + or:: + + @htmlmin(remove_comments=True) + def foobar(): + return ' minify me! ' + """ + def _decorator(fn): + minify = Minifier(**kwargs).minify + def wrapper(*a, **kw): + return minify(fn(*a, **kw)) + return wrapper + + if len(args) == 1: + if callable(args[0]) and not kwargs: + return _decorator(args[0]) + else: + raise RuntimeError( + 'htmlmin decorator does accept positional arguments') + elif len(args) > 1: + raise RuntimeError( + 'htmlmin decorator does accept positional arguments') + else: + return _decorator + diff --git a/Source/Libs/htmlmin/escape.py b/Source/Libs/htmlmin/escape.py new file mode 100644 index 0000000..5d0e2d4 --- /dev/null +++ b/Source/Libs/htmlmin/escape.py @@ -0,0 +1,204 @@ +""" +Copyright (c) 2015, Dave Mankoff +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Dave Mankoff nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import re + +try: + from html import escape +except ImportError: + from cgi import escape + +import re + +NO_QUOTES = 0 +SINGLE_QUOTE = 1 +DOUBLE_QUOTE = 2 + +UPPER_A = ord('A') +UPPER_F = ord('F') +UPPER_Z = ord('Z') +LOWER_A = ord('a') +LOWER_F = ord('f') +LOWER_Z = ord('z') +ZERO = ord('0') +NINE = ord('9') + + +# https://www.w3.org/TR/html5/syntax.html#attributes-0 +CHARS_TO_QUOTE_RE = re.compile(u'[\x20\x09\x0a\x0c\x0d=><`]') + +def escape_tag(val): + return escape(val) + +def escape_attr_name(val): + return escape(val) + +def escape_attr_value(val, double_quote=False): + val = escape_ambiguous_ampersand(val) + has_html_tag = '<' in val or '>' in val + if double_quote: + return (val.replace('"', '"'), DOUBLE_QUOTE) + + double_quote_count = 0 + single_quote_count = 0 + for ch in val: + if ch == '"': + double_quote_count += 1 + elif ch == "'": + single_quote_count += 1 + if double_quote_count > single_quote_count: + return (val.replace("'", '''), SINGLE_QUOTE) + elif single_quote_count: + return (val.replace('"', '"'), DOUBLE_QUOTE) + + if not val or CHARS_TO_QUOTE_RE.search(val): + return (val, DOUBLE_QUOTE) + return (val, NO_QUOTES) + +def escape_ambiguous_ampersand(val): + # TODO: this function could probably me made a lot faster. + if not '&' in val: # short circuit for speed + return val + + state = 0 + result = [] + amp_buff = [] + for c in val: + if state == 0: # beginning + if c == '&': + state = 1 + else: + result.append(c) + elif state == 1: # ampersand + ord_c = ord(c) + if (UPPER_A <= ord_c <= UPPER_Z or + LOWER_A <= ord_c <= LOWER_Z or + ZERO <= ord_c <= NINE): + amp_buff.append(c) # TODO: use "name character references" section + # https://html.spec.whatwg.org/multipage/syntax.html#named-character-references + elif c == '#': + state = 2 + elif c == ';': + if amp_buff: + result.append('&') + result.extend(amp_buff) + result.append(';') + else: + result.append('&;') + state = 0 + amp_buff = [] + elif c == '&': + if amp_buff: + result.append('&') + result.extend(amp_buff) + else: + result.append('&') + amp_buff = [] + else: + result.append('&') + result.extend(amp_buff) + result.append(c) + state = 0 + amp_buff = [] + elif state == 2: # numeric character reference + ord_c = ord(c) + if c == 'x' or c == 'X': + state = 3 + elif ZERO <= ord_c <= NINE: + amp_buff.append(c) + elif c == ';': + if amp_buff: + result.append('&#') + result.extend(amp_buff) + result.append(';') + else: + result.append('&#;') + state = 0 + amp_buff = [] + elif c == '&': + if amp_buff: + result.append('&#') + result.extend(amp_buff) + else: + result.append('&#') + state = 1 + amp_buff = [] + else: + if amp_buff: + result.append('&#') + result.extend(amp_buff) + result.append(c) + else: + result.append('&#') + result.append(c) + state = 0 + amp_buff = [] + elif state == 3: # hex character reference + ord_c = ord(c) + if (UPPER_A <= ord_c <= UPPER_F or + LOWER_A <= ord_c <= LOWER_F or + ZERO <= ord_c <= NINE): + amp_buff.append(c) + elif c == ';': + if amp_buff: + result.append('&#x') + result.extend(amp_buff) + result.append(';') + else: + result.append('&#x;') + state = 0 + amp_buff = [] + elif c == '&': + if amp_buff: + result.append('&#x') + result.extend(amp_buff) + else: + result.append('&#x') + state = 1 + amp_buff = [] + else: + if amp_buff: + result.append('&#x') + result.extend(amp_buff) + result.append(c) + else: + result.append('&#x') + result.append(c) + state = 0 + amp_buff = [] + + if state == 1: + result.append('&') + result.extend(amp_buff) + elif state == 2: + result.append('&#') + result.extend(amp_buff) + elif state == 3: + result.append('&#x') + result.extend(amp_buff) + + return ''.join(result) diff --git a/Source/Libs/htmlmin/main.py b/Source/Libs/htmlmin/main.py new file mode 100644 index 0000000..6d21c06 --- /dev/null +++ b/Source/Libs/htmlmin/main.py @@ -0,0 +1,193 @@ +""" +Copyright (c) 2013, Dave Mankoff +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Dave Mankoff nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +import cgi + +from . import parser + +def minify(input, + remove_comments=False, + remove_empty_space=False, + remove_all_empty_space=False, + reduce_empty_attributes=True, + reduce_boolean_attributes=False, + remove_optional_attribute_quotes=True, + convert_charrefs=True, + keep_pre=False, + pre_tags=parser.PRE_TAGS, + pre_attr='pre', + cls=parser.HTMLMinParser): + """Minifies HTML in one shot. + + :param input: A string containing the HTML to be minified. + :param remove_comments: Remove comments found in HTML. Individual comments can + be maintained by putting a ``!`` as the first character inside the comment. + Thus:: + + + + Will become simply:: + + + + The added exclamation is removed. + :param remove_empty_space: Remove empty space found in HTML between an opening + and a closing tag and when it contains a newline or carriage return. If + whitespace is found that is only spaces and/or tabs, it will be turned into + a single space. Be careful, this can have unintended consequences. + :param remove_all_empty_space: A more extreme version of + ``remove_empty_space``, this removes all empty whitespace found between + tags. This is almost guaranteed to break your HTML unless you are very + careful. + :param reduce_boolean_attributes: Where allowed by the HTML5 specification, + attributes such as 'disabled' and 'readonly' will have their value removed, + so 'disabled="true"' will simply become 'disabled'. This is generally a + good option to turn on except when JavaScript relies on the values. + :param remove_optional_attribute_quotes: When True, optional quotes around + attributes are removed. When False, all attribute quotes are left intact. + Defaults to True. + :param conver_charrefs: Decode character references such as & and . + to their single charater values where safe. This currently only applies to + attributes. Data content between tags will be left encoded. + :param keep_pre: By default, htmlmin uses the special attribute ``pre`` to + allow you to demarcate areas of HTML that should not be minified. It removes + this attribute as it finds it. Setting this value to ``True`` tells htmlmin + to leave the attribute in the output. + :param pre_tags: A list of tag names that should never be minified. You are + free to change this list as you see fit, but you will probably want to + include ``pre`` and ``textarea`` if you make any changes to the list. Note + that ``