Cleaned code a bit, added HTML minification

2025-06-05 22:09:23 +02:00 · 2022-06-20 16:16:41 +02:00
parent 8dee196213
commit 2cc2bfc62a
14 changed files with 1892 additions and 80 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
--- a/Locale/en.json
+++ b/Locale/en.json
@@ -0,0 +1,4 @@
+{
+	"CreatedOn": "Created on",
+	"EditedOn": "Edited on"
+}
--- a/Locale/it.json
+++ b/Locale/it.json
@@ -0,0 +1,4 @@
+{
+	"CreatedOn": "Creato in data",
+	"EditedOn": "Modificato in data"
+}
--- a/README.md
+++ b/README.md
@@ -12,9 +12,11 @@ Feel free to experiment with all of this stuff!
 ## Dependencies
 - [Python >= 3.10.4](https://python.org)
 - [Python Markdown >= 3.3.7](https://pypi.org/project/Markdown)
+- (Included) [htmlmin >= 0.1.12](https://pypi.org/project/htmlmin)
 - [pug-cli >= 1.0.0-alpha6](https://npmjs.com/package/pug-cli)

 ## Features roadmap
+- [x] HTML minification
 - [ ] Open Graph support
 - [x] Custom categories for posts
 - [x] Custom static page parts programmable by context
@@ -32,6 +34,5 @@ Feel free to experiment with all of this stuff!
 - [x] Generation of titles in right sidebar with clickable links
 - [x] Detections of titles in a page
 - [x] Custom static page parts by template
- [x] Pug support for pages
- [x] Markdown support for pages
+- [x] Markdown + Pug support for pages
 - [x] First working version
--- a/Source/Build.py
+++ b/Source/Build.py
@@ -8,12 +8,17 @@
 | ================================= """

 import argparse
+import json
+from Libs import htmlmin
 import os
 import shutil
 from ast import literal_eval
 from markdown import Markdown
 from pathlib import Path

+Extensions = {
+	'Pages': ('md', 'pug')}
+
 def ReadFile(p):
 	try:
 		with open(p, 'r') as f:
@@ -31,6 +36,15 @@ def WriteFile(p, c):
 		print("Error writing file {}".format(p))
 		return False

+def LoadLocale(Lang):
+	Lang = Lang + '.json'
+	Folder = os.path.dirname(os.path.abspath(__file__)) + '/../Locale/'
+	File = ReadFile(Folder + Lang)
+	if File:
+		return json.loads(File)
+	else:
+		return json.loads(ReadFile(Folder + 'en.json'))
+
 def StripExt(Path):
 	return ".".join(Path.split('.')[:-1])

@@ -44,20 +58,6 @@ def GetLevels(Path, Sub=0, AsNum=False):
 	n = Path.count('/')
 	return n if AsNum else '../' * n

-def GetDeepest(Paths):
-	Deepest = 0
-	for p in Paths:
-		l = GetLevels(p, True)
-		if l > Deepest:
-			Deepest = l
-	print(Deepest)
-	return Deepest
-
-def GetRelative(Path, Levels):
-	print(Path, Levels)
-	#return GetLevels(Path, Levels)
-	return '../' * Levels
-
 def DashifyStr(s, Limit=32):
 	Str, lc = '', Limit
 	for c in s[:Limit].replace(' ','-').replace('	','-'):
@@ -97,14 +97,13 @@ def GetTitleIdLine(Line, Title, Type):
 		NewLine += Line[Index+2:]
 		return NewLine

-def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, CurLevels, PathPrefix=''):
-	print(PathPrefix)
+def MakeListTitle(File, Meta, Titles, Prefer, SiteRoot, PathPrefix=''):
 	Title = GetTitle(Meta, Titles, Prefer)
 	Link = False if Meta['Index'] == 'Unlinked' else True
 	if Link:
 		Title = '[{}]({})'.format(
 			Title,
-			'{}{}.html'.format(PathPrefix, StripExt(File))) #(GetRelative(File, CurLevels), StripExt(File)))
+			'{}{}.html'.format(PathPrefix, StripExt(File)))
 	if Meta['Type'] == 'Post' and Meta['CreatedOn']:
 		Title = '[{}] {}'.format(
 			Meta['CreatedOn'],
@@ -193,20 +192,16 @@ def PugCompileList(Pages):
 			Paths += '"{}" '.format(Path)
 	os.system('pug -P {} > /dev/null'.format(Paths))

-def MakeContentHeader(Meta):
+def MakeContentHeader(Meta, Locale):
 	Header = ''
 	if Meta['Type'] == 'Post':
-		# TODO: Fix the hardcoded italian
-		if Meta['CreatedOn'] and Meta['EditedOn']:
-			Header += "Creato in data {}  \nModificato in data {}  \n".format(Meta['CreatedOn'], Meta['EditedOn'])
-		elif Meta['CreatedOn'] and not Meta['EditedOn']:
-			Header += "Creato in data {}  \n".format(Meta['CreatedOn'])
-		elif Meta['EditedOn'] and not Meta['CreatedOn']:
-			Header += "Modificato in data {}  \n".format(Meta['EditedOn'])
+		if Meta['CreatedOn']:
+			Header += "{} {}  \n".format(Locale['CreatedOn'], Meta['CreatedOn'])
+		if Meta['EditedOn']:
+			Header += "{} {}  \n".format(Locale['EditedOn'], Meta['EditedOn'])
 	return Markdown().convert(Header)

-def PatchHTML(Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, FolderRoots, Categories):
-	print(PagePath)
+def PatchHTML(Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList, PagePath, Content, Titles, Meta, SiteRoot, FolderRoots, Categories, Locale):
 	HTMLTitles = FormatTitles(Titles)
 	for Line in Template.splitlines():
 		Line = Line.lstrip().rstrip()
@@ -233,7 +228,7 @@ def PatchHTML(Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList
 	Template = Template.replace('[HTML:Page:Path]', PagePath)
 	Template = Template.replace('[HTML:Page:Style]', Meta['Style'])
 	Template = Template.replace('[HTML:Page:Content]', Content)
-	Template = Template.replace('[HTML:Page:ContentHeader]', MakeContentHeader(Meta))
+	Template = Template.replace('[HTML:Page:ContentHeader]', MakeContentHeader(Meta, Locale))
 	Template = Template.replace('[HTML:Site:AbsoluteRoot]', SiteRoot)
 	Template = Template.replace('[HTML:Site:RelativeRoot]', GetLevels(PagePath))
 	for i in FolderRoots:
@@ -260,10 +255,8 @@ def OrderPages(Old):
 		New.remove([])
 	return New

-def GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, Type='Page', Category=None):
-	List = ''
-	ToPop = []
-	LastParent = []
+def GetHTMLPagesList(Pages, SiteRoot, PathPrefix, Type='Page', Category=None):
+	List, ToPop, LastParent = '', [], []
 	IndexPages = Pages.copy()
 	for e in IndexPages:
 		if e[3]['Index'] == 'False' or e[3]['Index'] == 'None':
@@ -271,8 +264,7 @@ def GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, Type='Page', Catego
 	for i,e in enumerate(IndexPages):
 		if e[3]['Type'] != Type:
 			ToPop += [i]
-	ToPop.sort()
-	ToPop.reverse()
+	ToPop = RevSort(ToPop)
 	for i in ToPop:
 		IndexPages.pop(i)
 	if Type == 'Page':
@@ -287,83 +279,107 @@ def GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, Type='Page', Catego
 						LastParent = CurParent
 						Levels = '- ' * (n-1+i)
 						if File[:-3].endswith('index.'):
-							Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, CurLevels, PathPrefix)
+							Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix)
 						else:
 							Title = CurParent[n-2+i]
 						List += Levels + Title + '\n'
 			if not (n > 1 and File[:-3].endswith('index.')):
 				Levels = '- ' * n
-				Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, CurLevels, PathPrefix)
+				Title = MakeListTitle(File, Meta, Titles, 'HTMLTitle', SiteRoot, PathPrefix)
 				List += Levels + Title + '\n'
 	return Markdown().convert(List)

 def DelTmp():
-	for File in Path('public').rglob('*.pug'):
-		os.remove(File)
-	for File in Path('public').rglob('*.md'):
-		os.remove(File)
+	for Ext in Extensions['Pages']:
+		for File in Path('public').rglob('*.{}'.format(Ext)):
+			os.remove(File)

-def MakeSite(TemplatesText, PartsText, ContextParts, ContextPartsText, SiteRoot, FolderRoots):
-	Files = []
-	Pages = []
-	Categories = {}
-	for File in Path('Pages').rglob('*.pug'):
-		Files += [FileToStr(File, 'Pages/')]
-	for File in Path('Pages').rglob('*.md'):
-		Files += [FileToStr(File, 'Pages/')]
-	Files.sort()
-	Files.reverse()
+def RevSort(List):
+	List.sort()
+	List.reverse()
+	return List
+
+def DoMinify(HTML):
+	return htmlmin.minify(
+		input=HTML,
+		remove_comments=True,
+		remove_empty_space=True,
+		remove_all_empty_space=False,
+		reduce_empty_attributes=True,
+		reduce_boolean_attributes=True,
+		remove_optional_attribute_quotes=True,
+		convert_charrefs=True,
+		keep_pre=True)
+
+def MakeSite(TemplatesText, PartsText, ContextParts, ContextPartsText, SiteRoot, FolderRoots, Locale, Minify):
+	Files, Pages, Categories = [], [], {}
+	for Ext in Extensions['Pages']:
+		for File in Path('Pages').rglob('*.{}'.format(Ext)):
+			Files += [FileToStr(File, 'Pages/')]
+	Files = RevSort(Files)
 	for File in Files:
 		Content, Titles, Meta = PreProcessor('Pages/{}'.format(File), SiteRoot)
 		Pages += [[File, Content, Titles, Meta]]
 		for Category in Meta['Categories']:
 			Categories.update({Category:''})
 	PugCompileList(Pages)
-	print(Files)
 	for Category in Categories:
-		Categories[Category] = GetHTMLPagesList(Pages, SiteRoot, 0, '../../', 'Post', Category)
+		Categories[Category] = GetHTMLPagesList(
+			Pages=Pages,
+			SiteRoot=SiteRoot,
+			PathPrefix='../../', # This hardcodes paths, TODO make it somehow guess the path for every page containing the [HTML:Category] macro
+			Type='Post',
+			Category=Category)
 	for File, Content, Titles, Meta in Pages:
-		CurLevels = GetLevels(File, 0, True)
-		PathPrefix = GetLevels(File)
-		print(PathPrefix)
-		print(File, CurLevels)
-		HTMLPagesList = GetHTMLPagesList(Pages, SiteRoot, CurLevels, PathPrefix, 'Page')
+		HTMLPagesList = GetHTMLPagesList(
+			Pages=Pages,
+			SiteRoot=SiteRoot,
+			PathPrefix=GetLevels(File),
+			Type='Page')
 		PagePath = 'public/{}.html'.format(StripExt(File))
 		if File.endswith('.md'):
 			Content = Markdown().convert(Content)
 		elif File.endswith('.pug'):
 			Content = ReadFile(PagePath)
-		Template = TemplatesText[Meta['Template']]
-		Template = Template.replace(
-			'[HTML:Site:AbsoluteRoot]',
-			SiteRoot)
-		Template = Template.replace(
-			'[HTML:Site:RelativeRoot]',
-			GetLevels(File))
-		WriteFile(
-			PagePath,
-			PatchHTML(
-				Template, PartsText, ContextParts, ContextPartsText, HTMLPagesList,
-				PagePath[len('public/'):], Content, Titles, Meta, SiteRoot, FolderRoots, Categories))
+		HTML = PatchHTML(
+			Template=TemplatesText[Meta['Template']],
+			PartsText=PartsText,
+			ContextParts=ContextParts,
+			ContextPartsText=ContextPartsText,
+			HTMLPagesList=HTMLPagesList,
+			PagePath=PagePath[len('public/'):],
+			Content=Content,
+			Titles=Titles,
+			Meta=Meta,
+			SiteRoot=SiteRoot,
+			FolderRoots=FolderRoots,
+			Categories=Categories,
+			Locale=Locale)
+		if Minify != 'False' and Minify != 'None':
+			HTML = DoMinify(HTML)
+		WriteFile(PagePath, HTML)
 	DelTmp()

 def Main(Args):
 	ResetPublic()
 	shutil.copytree('Pages', 'public')
 	MakeSite(
-		LoadFromDir('Templates', '*.html'),
-		LoadFromDir('Parts', '*.html'),
-		literal_eval(Args.ContextParts) if Args.ContextParts else {},
-		LoadFromDir('ContextParts', '*.html'),
-		Args.SiteRoot if Args.SiteRoot else '/',
-		literal_eval(Args.FolderRoots) if Args.FolderRoots else {})
+		TemplatesText=LoadFromDir('Templates', '*.html'),
+		PartsText=LoadFromDir('Parts', '*.html'),
+		ContextParts=literal_eval(Args.ContextParts) if Args.ContextParts else {},
+		ContextPartsText=LoadFromDir('ContextParts', '*.html'),
+		SiteRoot=Args.SiteRoot if Args.SiteRoot else '/',
+		FolderRoots=literal_eval(Args.FolderRoots) if Args.FolderRoots else {},
+		Locale=LoadLocale(Args.SiteLang if Args.SiteLang else 'en'),
+		Minify=Args.Minify if Args.Minify else 'None')
 	os.system("cp -R Assets/* public/")

 if __name__ == '__main__':
 	Parser = argparse.ArgumentParser()
+	Parser.add_argument('--SiteLang', type=str)
 	Parser.add_argument('--SiteRoot', type=str)
 	Parser.add_argument('--FolderRoots', type=str)
 	Parser.add_argument('--ContextParts', type=str)
-	Args = Parser.parse_args()
-
-	Main(Args)
+	Parser.add_argument('--Minify', type=str)
+	Main(
+		Args=Parser.parse_args())
--- a/Source/Libs/htmlmin/init.py
+++ b/Source/Libs/htmlmin/init.py
@@ -0,0 +1,30 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import minify, Minifier
+
+__version__ = '0.1.12'
--- a/Source/Libs/htmlmin/command.py
+++ b/Source/Libs/htmlmin/command.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import argparse
+import codecs
+import locale
+import io
+import sys
+
+#import htmlmin
+from . import Minifier
+
+parser = argparse.ArgumentParser(
+  description='Minify HTML',
+  formatter_class=argparse.RawTextHelpFormatter
+  )
+
+parser.add_argument('input_file',
+  nargs='?',
+  metavar='INPUT',
+  help='File path to html file to minify. Defaults to stdin.',
+  )
+
+parser.add_argument('output_file',
+  nargs='?',
+  metavar='OUTPUT',
+  help="File path to output to. Defaults to stdout.",
+  )
+
+parser.add_argument('-c', '--remove-comments',
+  help=(
+'''When set, comments will be removed. They can be kept on an individual basis
+by starting them with a '!': <!--! comment -->. The '!' will be removed from
+the final output. If you want a '!' as the leading character of your comment,
+put two of them: <!--!! comment -->.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-s', '--remove-empty-space',
+  help=(
+'''When set, this removes empty space betwen tags in certain cases.
+Specifically, it will remove empty space if and only if there a newline
+character occurs within the space. Thus, code like
+'<span>x</span> <span>y</span>' will be left alone, but code such as
+'   ...
+  </head>
+  <body>
+    ...'
+will become '...</head><body>...'. Note that this CAN break your
+html if you spread two inline tags over two lines. Use with caution.
+
+'''),
+  action='store_true')
+
+parser.add_argument('--remove-all-empty-space',
+  help=(
+'''When set, this removes ALL empty space betwen tags. WARNING: this can and
+likely will cause unintended consequences. For instance, '<i>X</i> <i>Y</i>'
+will become '<i>X</i><i>Y</i>'. Putting whitespace along with other text will
+avoid this problem. Only use if you are confident in the result. Whitespace is
+not removed from inside of tags, thus '<span> </span>' will be left alone.
+
+'''),
+  action='store_true')
+
+parser.add_argument('--keep-optional-attribute-quotes',
+  help=(
+'''When set, this keeps all attribute quotes, even if they are optional.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-H', '--in-head',
+  help=(
+'''If you are parsing only a fragment of HTML, and the fragment occurs in the
+head of the document, setting this will remove some extra whitespace.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-k', '--keep-pre-attr',
+  help=(
+'''HTMLMin supports the propietary attribute 'pre' that can be added to elements
+to prevent minification. This attribute is removed by default. Set this flag to
+keep the 'pre' attributes in place.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-a', '--pre-attr',
+  help=(
+'''The attribute htmlmin looks for to find blocks of HTML that it should not
+minify. This attribute will be removed from the HTML unless '-k' is
+specified. Defaults to 'pre'.
+
+'''),
+  default='pre')
+
+
+parser.add_argument('-p', '--pre-tags',
+  metavar='TAG',
+  help=(
+'''By default, the contents of 'pre', and 'textarea' tags are left unminified.
+You can specify different tags using the --pre-tags option. 'script' and 'style'
+tags are always left unmininfied.
+
+'''),
+  nargs='*',
+  default=['pre', 'textarea'])
+parser.add_argument('-e', '--encoding',
+  help=("Encoding to read and write with. Default 'utf-8'."
+        " When reading from stdin, attempts to use the system's"
+        " encoding before defaulting to utf-8.\n\n"),
+  default=None,
+  )
+
+def main():
+  args = parser.parse_args()
+  minifier = Minifier(
+    remove_comments=args.remove_comments,
+    remove_empty_space=args.remove_empty_space,
+    remove_optional_attribute_quotes=not args.keep_optional_attribute_quotes,
+    pre_tags=args.pre_tags,
+    keep_pre=args.keep_pre_attr,
+    pre_attr=args.pre_attr,
+    )
+  default_encoding = args.encoding or 'utf-8'
+
+  if args.input_file:
+    inp = codecs.open(args.input_file, encoding=default_encoding)
+  else:
+    encoding = args.encoding or sys.stdin.encoding \
+      or locale.getpreferredencoding() or default_encoding
+    inp = io.open(sys.stdin.fileno(), encoding=encoding)
+
+  for line in inp.readlines():
+    minifier.input(line)
+
+  if args.output_file:
+    codecs.open(
+      args.output_file, 'w', encoding=default_encoding).write(minifier.output)
+  else:
+    encoding = args.encoding or sys.stdout.encoding \
+      or locale.getpreferredencoding() or default_encoding
+    io.open(sys.stdout.fileno(), 'w', encoding=encoding).write(minifier.output)
+
+if __name__ == '__main__':
+  main()
+
--- a/Source/Libs/htmlmin/decorator.py
+++ b/Source/Libs/htmlmin/decorator.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import Minifier
+
+def htmlmin(*args, **kwargs):
+  """Minifies HTML that is returned by a function.
+
+  A simple decorator that minifies the HTML output of any function that it
+  decorates. It supports all the same options that :class:`htmlmin.minify` has.
+  With no options, it uses ``minify``'s default settings::
+
+      @htmlmin
+      def foobar():
+         return '   minify me!   '
+
+  or::
+
+      @htmlmin(remove_comments=True)
+      def foobar():
+         return '   minify me!  <!-- and remove me! -->'
+  """
+  def _decorator(fn):
+    minify = Minifier(**kwargs).minify
+    def wrapper(*a, **kw):
+      return minify(fn(*a, **kw))
+    return wrapper
+
+  if len(args) == 1:
+    if callable(args[0]) and not kwargs:
+      return _decorator(args[0])
+    else:
+      raise RuntimeError(
+          'htmlmin decorator does accept positional arguments')
+  elif len(args) > 1:
+    raise RuntimeError(
+      'htmlmin decorator does accept positional arguments')
+  else:
+    return _decorator
+        
--- a/Source/Libs/htmlmin/escape.py
+++ b/Source/Libs/htmlmin/escape.py
@@ -0,0 +1,204 @@
+"""
+Copyright (c) 2015, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import re
+
+try:
+  from html import escape
+except ImportError:
+  from cgi import escape
+
+import re
+
+NO_QUOTES = 0
+SINGLE_QUOTE = 1
+DOUBLE_QUOTE = 2
+
+UPPER_A = ord('A')
+UPPER_F = ord('F')
+UPPER_Z = ord('Z')
+LOWER_A = ord('a')
+LOWER_F = ord('f')
+LOWER_Z = ord('z')
+ZERO = ord('0')
+NINE = ord('9')
+
+
+# https://www.w3.org/TR/html5/syntax.html#attributes-0
+CHARS_TO_QUOTE_RE = re.compile(u'[\x20\x09\x0a\x0c\x0d=><`]')
+
+def escape_tag(val):
+  return escape(val)
+
+def escape_attr_name(val):
+  return escape(val)
+
+def escape_attr_value(val, double_quote=False):
+  val = escape_ambiguous_ampersand(val)
+  has_html_tag = '<' in val or '>' in val
+  if double_quote:
+    return (val.replace('"', '&#34;'), DOUBLE_QUOTE)
+
+  double_quote_count = 0
+  single_quote_count = 0
+  for ch in val:
+    if ch == '"':
+      double_quote_count += 1
+    elif ch == "'":
+      single_quote_count += 1
+  if double_quote_count > single_quote_count:
+    return (val.replace("'", '&#39;'), SINGLE_QUOTE)
+  elif single_quote_count:
+    return (val.replace('"', '&#34;'), DOUBLE_QUOTE)
+
+  if not val or CHARS_TO_QUOTE_RE.search(val):
+    return (val, DOUBLE_QUOTE)
+  return (val, NO_QUOTES)
+
+def escape_ambiguous_ampersand(val):
+  # TODO: this function could probably me made a lot faster.
+  if not '&' in val:  # short circuit for speed
+    return val
+
+  state = 0
+  result = []
+  amp_buff = []
+  for c in val:
+    if state == 0:  # beginning
+      if c == '&':
+        state = 1
+      else:
+        result.append(c)
+    elif state == 1:  # ampersand
+      ord_c = ord(c)
+      if (UPPER_A <= ord_c <= UPPER_Z or
+            LOWER_A <= ord_c <= LOWER_Z or
+            ZERO <= ord_c <= NINE):
+        amp_buff.append(c)  # TODO: use "name character references" section
+        # https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
+      elif c == '#':
+        state = 2
+      elif c == ';':
+        if amp_buff:
+          result.append('&')
+          result.extend(amp_buff)
+          result.append(';')
+        else:
+          result.append('&;')
+        state = 0
+        amp_buff = []
+      elif c == '&':
+        if amp_buff:
+          result.append('&amp;')
+          result.extend(amp_buff)
+        else:
+          result.append('&')
+        amp_buff = []
+      else:
+        result.append('&')
+        result.extend(amp_buff)
+        result.append(c)
+        state = 0
+        amp_buff = []
+    elif state == 2:  # numeric character reference
+      ord_c = ord(c)
+      if c == 'x' or c == 'X':
+        state = 3
+      elif ZERO <= ord_c <= NINE:
+        amp_buff.append(c)
+      elif c == ';':
+        if amp_buff:
+          result.append('&#')
+          result.extend(amp_buff)
+          result.append(';')
+        else:
+          result.append('&#;')
+        state = 0
+        amp_buff = []
+      elif c == '&':
+        if amp_buff:
+          result.append('&amp;#')
+          result.extend(amp_buff)
+        else:
+          result.append('&#')
+        state = 1
+        amp_buff = []
+      else:
+        if amp_buff:
+          result.append('&amp;#')
+          result.extend(amp_buff)
+          result.append(c)
+        else:
+          result.append('&#')
+          result.append(c)
+        state = 0
+        amp_buff = []
+    elif state == 3:  # hex character reference
+      ord_c = ord(c)
+      if (UPPER_A <= ord_c <= UPPER_F or
+          LOWER_A <= ord_c <= LOWER_F or
+          ZERO <= ord_c <= NINE):
+        amp_buff.append(c)
+      elif c == ';':
+        if amp_buff:
+          result.append('&#x')
+          result.extend(amp_buff)
+          result.append(';')
+        else:
+          result.append('&#x;')
+        state = 0
+        amp_buff = []
+      elif c == '&':
+        if amp_buff:
+          result.append('&amp;#x')
+          result.extend(amp_buff)
+        else:
+          result.append('&#x')
+        state = 1
+        amp_buff = []
+      else:
+        if amp_buff:
+          result.append('&amp;#x')
+          result.extend(amp_buff)
+          result.append(c)
+        else:
+          result.append('&#x')
+          result.append(c)
+        state = 0
+        amp_buff = []
+
+  if state == 1:
+    result.append('&amp;')
+    result.extend(amp_buff)
+  elif state == 2:
+    result.append('&amp;#')
+    result.extend(amp_buff)
+  elif state == 3:
+    result.append('&amp;#x')
+    result.extend(amp_buff)
+
+  return ''.join(result)
--- a/Source/Libs/htmlmin/main.py
+++ b/Source/Libs/htmlmin/main.py
@@ -0,0 +1,193 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import cgi
+
+from . import parser
+
+def minify(input,
+           remove_comments=False,
+           remove_empty_space=False,
+           remove_all_empty_space=False,
+           reduce_empty_attributes=True,
+           reduce_boolean_attributes=False,
+           remove_optional_attribute_quotes=True,
+           convert_charrefs=True,
+           keep_pre=False,
+           pre_tags=parser.PRE_TAGS,
+           pre_attr='pre',
+           cls=parser.HTMLMinParser):
+  """Minifies HTML in one shot.
+
+  :param input: A string containing the HTML to be minified.
+  :param remove_comments: Remove comments found in HTML. Individual comments can
+    be maintained by putting a ``!`` as the first character inside the comment.
+    Thus::
+
+       <!-- FOO --> <!--! BAR -->
+
+    Will become simply::
+
+       <!-- BAR -->
+
+    The added exclamation is removed.
+  :param remove_empty_space: Remove empty space found in HTML between an opening
+    and a closing tag and when it contains a newline or carriage return. If
+    whitespace is found that is only spaces and/or tabs, it will be turned into
+    a single space. Be careful, this can have unintended consequences.
+  :param remove_all_empty_space: A more extreme version of
+    ``remove_empty_space``, this removes all empty whitespace found between
+    tags. This is almost guaranteed to break your HTML unless you are very
+    careful.
+  :param reduce_boolean_attributes: Where allowed by the HTML5 specification,
+    attributes such as 'disabled' and 'readonly' will have their value removed,
+    so 'disabled="true"' will simply become 'disabled'. This is generally a
+    good option to turn on except when JavaScript relies on the values.
+  :param remove_optional_attribute_quotes: When True, optional quotes around
+    attributes are removed. When False, all attribute quotes are left intact.
+    Defaults to True.
+  :param conver_charrefs: Decode character references such as &amp; and &#46;
+    to their single charater values where safe. This currently only applies to
+    attributes. Data content between tags will be left encoded.
+  :param keep_pre: By default, htmlmin uses the special attribute ``pre`` to
+    allow you to demarcate areas of HTML that should not be minified. It removes
+    this attribute as it finds it. Setting this value to ``True`` tells htmlmin
+    to leave the attribute in the output.
+  :param pre_tags: A list of tag names that should never be minified. You are
+    free to change this list as you see fit, but you will probably want to
+    include ``pre`` and ``textarea`` if you make any changes to the list. Note
+    that ``<script>`` and ``<style>`` tags are never minimized.
+  :param pre_attr: Specifies the attribute that, when found in an HTML tag,
+    indicates that the content of the tag should not be minified. Defaults to
+    ``pre``. You can also prefix individual tag attributes with 
+    ``{pre_attr}-`` to prevent the contents of the individual attribute from
+    being changed.
+  :return: A string containing the minified HTML.
+
+  If you are going to be minifying multiple HTML documents, each with the same
+  settings, consider using :class:`.Minifier`.
+  """
+  minifier = cls(
+      remove_comments=remove_comments,
+      remove_empty_space=remove_empty_space,
+      remove_all_empty_space=remove_all_empty_space,
+      reduce_empty_attributes=reduce_empty_attributes,
+      reduce_boolean_attributes=reduce_boolean_attributes,
+      remove_optional_attribute_quotes=remove_optional_attribute_quotes,
+      convert_charrefs=convert_charrefs,
+      keep_pre=keep_pre,
+      pre_tags=pre_tags,
+      pre_attr=pre_attr)
+  minifier.feed(input)
+  minifier.close()
+  return minifier.result
+
+class Minifier(object):
+  """An object that supports HTML Minification.
+
+  Options are passed into this class at initialization time and are then
+  persisted across each use of the instance. If you are going to be minifying
+  multiple peices of HTML, this will be more efficient than using
+  :class:`htmlmin.minify`.
+
+  See :class:`htmlmin.minify` for an explanation of options.
+  """
+
+  def __init__(self,
+               remove_comments=False,
+               remove_empty_space=False,
+               remove_all_empty_space=False,
+               reduce_empty_attributes=True,
+               reduce_boolean_attributes=False,
+               remove_optional_attribute_quotes=True,
+               convert_charrefs=True,
+               keep_pre=False,
+               pre_tags=parser.PRE_TAGS,
+               pre_attr='pre',
+               cls=parser.HTMLMinParser):
+    """Initialize the Minifier.
+
+    See :class:`htmlmin.minify` for an explanation of options.
+    """
+    self._parser = cls(
+      remove_comments=remove_comments,
+      remove_empty_space=remove_empty_space,
+      remove_all_empty_space=remove_all_empty_space,
+      reduce_empty_attributes=reduce_empty_attributes,
+      reduce_boolean_attributes=reduce_boolean_attributes,
+      remove_optional_attribute_quotes=remove_optional_attribute_quotes,
+      convert_charrefs=convert_charrefs,
+      keep_pre=keep_pre,
+      pre_tags=pre_tags,
+      pre_attr=pre_attr)
+
+  def minify(self, *input):
+    """Runs HTML through the minifier in one pass.
+
+    :param input: HTML to be fed into the minimizer. Multiple chunks of HTML
+      can be provided, and they are fed in sequentially as if they were
+      concatenated.
+    :returns: A string containing the minified HTML.
+
+    This is the simplest way to use an existing ``Minifier`` instance. This
+    method takes in HTML and minfies it, returning the result. Note that this
+    method resets the internal state of  the parser before it does any work. If
+    there is pending HTML in the buffers, it will be lost.
+    """
+    self._parser.reset()
+    self.input(*input)
+    return self.finalize()
+
+  def input(self, *input):
+    """Feed more HTML into the input stream
+
+    :param input: HTML to be fed into the minimizer. Multiple chunks of HTML
+      can be provided, and they are fed in sequentially as if they were
+      concatenated. You can also call this method multiple times to achieve
+      the same effect.
+    """
+    for i in input:
+      self._parser.feed(i)
+
+  @property
+  def output(self):
+    """Retrieve the minified output generated thus far.
+    """
+    return self._parser.result
+
+  def finalize(self):
+    """Finishes current input HTML and returns mininified result.
+
+    This method flushes any remaining input HTML and returns the minified
+    result. It resets the state of the internal parser in the process so that
+    new HTML can be minified. Be sure to call this method before you reuse
+    the ``Minifier`` instance on a new HTML document.
+    """
+    self._parser.close()
+    result = self._parser.result
+    self._parser.reset()
+    return result
--- a/Source/Libs/htmlmin/middleware.py
+++ b/Source/Libs/htmlmin/middleware.py
@@ -0,0 +1,92 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import Minifier
+
+class HTMLMinMiddleware(object):
+  """WSGI Middleware that minifies html on the way out.
+
+  :param by_default: Specifies if minification should be turned on or off by
+    default. Defaults to ``True``.
+  :param keep_header: The middleware recognizes one custom HTTP header that 
+    can be used to turn minification on or off on a per-request basis:
+    ``X-HTML-Min-Enable``. Setting the header to ``true`` will turn minfication
+    on; anything else will turn minification off. If ``by_default`` is set to 
+    ``False``, this header is how you would turn minification back on. The
+    middleware, by default, removes the header from the output. Setting this
+    to ``True`` leaves the header in tact.
+  :param debug: A quick setting to turn all minification off. The middleware
+    is effectively bypassed.
+
+  This simple middleware minifies any HTML content that passes through it. Any
+  additional keyword arguments beyond the three settings the middleware has are
+  passed on to the internal minifier. The documentation for the options can
+  be found under :class:`htmlmin.minify`.
+  """
+  def __init__(self, app, by_default=True, keep_header=False, 
+               debug=False, **kwargs):
+    self.app = app
+    self.by_default = by_default
+    self.debug = debug
+    self.keep_header = keep_header
+    self.minifier = Minifier(**kwargs)
+    
+  def __call__(self, environ, start_response):
+    if self.debug:
+      return self.app(environ, start_response)
+
+    should_minify = []  # need to use a mutable object so we can change it
+                        # in a different scope.
+    def minified_start_response(status, headers, exc_info=None):
+      should_minify.append(self.should_minify(headers))
+      if not self.keep_header:
+        headers = [(header, value) for header, value in 
+                   headers if header != 'X-HTML-Min-Enable']
+      start_response(status, headers, exc_info)
+
+    html = [i for i in self.app(environ, minified_start_response)]
+    if should_minify[0]:
+      return [self.minifier.minify(*html)]
+    return html
+  
+  def should_minify(self, headers):
+    is_html = False
+    flag_header = None
+    for header, value in headers:
+      if not is_html and header == 'Content-Type' and value == 'text/html':
+        is_html = True
+        if flag_header is not None:
+          break
+
+      if flag_header is None and header == 'X-HTML-Min-Enable':
+        flag_header = (value.lower() == 'true')
+        if is_html:
+          break
+
+    return is_html and (
+      (self.by_default and flag_header != False) or 
+      (not self.by_default and flag_header))
--- a/Source/Libs/htmlmin/parser.py
+++ b/Source/Libs/htmlmin/parser.py
@@ -0,0 +1,408 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from __future__ import unicode_literals
+import logging
+import sys
+
+import re
+from .python3html.parser import HTMLParser
+
+from . import escape
+
+# https://www.w3.org/TR/html5/single-page.html#space-character
+HTML_SPACE_RE = re.compile('[\x20\x09\x0a\x0c\x0d]+')
+HTML_ALL_SPACE_RE = re.compile('^[\x20\x09\x0a\x0c\x0d]+$')
+HTML_LEADING_SPACE_RE = re.compile(
+  '^[\x20\x09\x0a\x0c\x0d]+')
+HTML_TRAILING_SPACE_RE = re.compile(
+  '[\x20\x09\x0a\x0c\x0d]+$')
+HTML_LEADING_TRAILING_SPACE_RE = re.compile(
+  '(^[\x20\x09\x0a\x0c\x0d]+)|([\x20\x09\x0a\x0c\x0d]+$)')
+
+PRE_TAGS = ('pre', 'textarea')  # styles and scripts are never minified
+# http://www.w3.org/TR/html51/syntax.html#elements-0
+NO_CLOSE_TAGS = ('area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img',
+                 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track',
+                 'wbr')
+# http://www.w3.org/TR/html51/index.html#attributes-1
+BOOLEAN_ATTRIBUTES = {
+  'audio': ('autoplay', 'controls', 'hidden', 'loop', 'muted',),
+  'button': ('autofocus', 'disabled', 'formnovalidate', 'hidden',),
+  'command': ('checked', 'disabled', 'hidden'),
+  'dialog': ('hidden', 'open',),
+  'fieldset': ('disabled', 'hidden',),
+  'form': ('hidden', 'novalidate',),
+  'iframe': ('hidden', 'seamless',),
+  'img': ('hidden', 'ismap',),
+  'input': ('autofocus', 'checked', 'disabled', 'formnovalidate', 'hidden',
+            'multiple', 'readonly', 'required',),
+  'keygen': ('autofocus', 'disabled', 'hidden',),
+  'object': ('hidden', 'typesmustmatch',),
+  'ol': ('hidden', 'reversed',),
+  'optgroup': ('disabled', 'hidden',),
+  'option': ('disabled', 'hidden', 'selected',),
+  'script': ('async', 'defer', 'hidden',),
+  'select': ('autofocus', 'disabled', 'hidden', 'multiple', 'required',),
+  'style': ('hidden', 'scoped',),
+  'textarea': ('autofocus', 'disabled', 'hidden', 'readonly', 'required',),
+  'track': ('default', 'hidden', ),
+  'video': ('autoplay', 'controls', 'hidden', 'loop', 'muted',),
+  '*': ('hidden',),
+}
+
+# a list of tags and tags that they are closed by
+TAG_SETS = {
+  'li': ('li',),
+  'dd': ('dd', 'dt'),
+  'rp': ('rp', 'rt'),
+  'p': ('address', 'article', 'aside', 'blockquote', 'dir', 'div', 'dl',
+        'fieldset', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+        'header', 'hgroup', 'hr', 'menu', 'nav', 'ol', 'p', 'pre', 'section',
+        'table', 'ul'),
+  'optgroup': ('optgroup',),
+  'option': ('option', 'optgroup'),
+  'colgroup': '*',
+  'tbody': ('tbody', 'tfoot'),
+  'tfoot': ('tbody',),
+  'tr': ('tr',),
+  'td': ('td', 'th'),
+}
+TAG_SETS['dt'] = TAG_SETS['dd']
+TAG_SETS['rt'] = TAG_SETS['rp']
+TAG_SETS['thead'] = TAG_SETS['tbody']
+TAG_SETS['th'] = TAG_SETS['td']
+
+# Tag omission rules:
+# http://www.w3.org/TR/html51/syntax.html#optional-tags
+
+class HTMLMinError(Exception): pass
+class ParseError(HTMLMinError): pass
+class OpenTagNotFoundError(ParseError): pass
+
+class HTMLMinParser(HTMLParser):
+  def __init__(self,
+               remove_comments=False,
+               remove_empty_space=False,
+               remove_all_empty_space=False,
+               reduce_empty_attributes=True,
+               reduce_boolean_attributes=False,
+               remove_optional_attribute_quotes=True,
+               convert_charrefs=True,
+               keep_pre=False,
+               pre_tags=PRE_TAGS,
+               pre_attr='pre'):
+    if sys.version_info[0] >= 3 and sys.version_info[1] >= 4:
+      # convert_charrefs is True by default in Python 3.5.0 and newer. It was
+      # introduced in 3.4.
+      HTMLParser.__init__(self, convert_charrefs=False)
+    else:
+      HTMLParser.__init__(self)
+    self.keep_pre = keep_pre
+    self.pre_tags = pre_tags
+    self.remove_comments = remove_comments
+    self.remove_empty_space = remove_empty_space
+    self.remove_all_empty_space = remove_all_empty_space
+    self.reduce_empty_attributes = reduce_empty_attributes
+    self.reduce_boolean_attributes = reduce_boolean_attributes
+    self.remove_optional_attribute_quotes = remove_optional_attribute_quotes
+    self.convert_charrefs = convert_charrefs
+    self.pre_attr = pre_attr
+    self.reset()
+
+  def _tag_lang(self):
+    return self._tag_stack[0][2] if self._tag_stack else None
+
+  def build_tag(self, tag, attrs, close_tag):
+    has_pre = False
+
+    if self.reduce_boolean_attributes:
+      bool_attrs = BOOLEAN_ATTRIBUTES.get(tag, BOOLEAN_ATTRIBUTES['*'])
+    else:
+      bool_attrs = False
+
+    lang = self._tag_lang()
+    attrs = list(attrs)  # We're modifying it in place
+    last_quoted = last_no_slash = i = -1
+    for k, v in attrs:
+      pre_prefix = k.startswith("{}-".format(self.pre_attr))
+      if pre_prefix:
+        k = k[len(self.pre_attr)+1:]
+      if k == self.pre_attr:
+        has_pre = True
+        if not self.keep_pre and not pre_prefix:
+          continue
+      if v and self.convert_charrefs and not pre_prefix:
+        v = HTMLParser.unescape(self, v)
+      if k == 'lang':
+        lang = v
+        if v == self._tag_lang():
+          continue
+
+      i += 1
+      if not pre_prefix:
+        k = escape.escape_attr_name(k)
+      if (v is None or (not v and self.reduce_empty_attributes) or
+          (bool_attrs and k in bool_attrs)):
+        # For our use case, we treat boolean attributes as quoted because they
+        # don't require space between them and "/>" in closing tags.
+        attrs[i] = k
+        last_quoted = i
+      else:
+        if pre_prefix:
+          has_double_quotes = '"' in v
+          has_single_quotes = "'" in v
+          if not has_double_quotes:
+            if not has_single_quotes and self.remove_optional_attribute_quotes:
+              q = escape.NO_QUOTES
+            else:
+              q = escape.DOUBLE_QUOTE
+          elif not has_single_quotes:
+            q = escape.SINGLE_QUOTES
+          else:
+            logging.error('Unsafe content found in pre-attribute. Escaping.')
+            (v, q) = escape.escape_attr_value(
+              v, double_quote=not self.remove_optional_attribute_quotes)
+        else:
+          (v, q) = escape.escape_attr_value(
+            v, double_quote=not self.remove_optional_attribute_quotes)
+        if q == escape.NO_QUOTES:
+          attrs[i] = '%s=%s' % (k, v)
+          if v[-1] != '/':
+            last_no_slash = i
+        else:
+          q = '"' if q == escape.DOUBLE_QUOTE else "'"
+          attrs[i] = '%s=%s%s%s' % (k, q, v, q)
+          last_quoted = i
+
+    i += 1
+    if i != len(attrs):
+      del attrs[i:]
+
+    # 1. If there are no attributes, no additional space is necessary.
+    # 2. If last attribute is quoted, no additional space is necessary.
+    # 3. Two things are happening here:
+    #    a) according to the standard, <foo bar=baz/> should be treated as <foo
+    #       bar="baz/"> so space is necessary if this is self-closing tag,
+    #       however
+    #    b) reportedly (https://github.com/mankyd/htmlmin/pull/12), older
+    #       versions of WebKit interpret <foo bar=baz/> as self-closing tag so
+    #       we need the space if the last argument ends with a slash.
+    space_maybe = ''
+    if attrs:
+      needs_space = lambda last_attr: (last_attr[-1] not in '"\'' and
+                                       (close_tag or last_attr[-1] == '/'))
+      if needs_space(attrs[-1][-1]):
+        # If moving attributes around can help, do it.  Otherwise bite the
+        # bullet and put the space in.
+        i = last_no_slash if last_quoted == -1 else last_quoted
+        if i == -1 or needs_space(attrs[i]):
+          space_maybe = ' '
+        else:
+          attrs.append(attrs[i])
+          del attrs[i]
+
+    return has_pre, '<%s%s%s%s%s>' % (escape.escape_tag(tag),
+                                      ' ' if attrs else '',
+                                      ' '.join(attrs),
+                                      space_maybe,
+                                      '/' if close_tag else ''), lang
+
+  def handle_decl(self, decl):
+    if (len(self._data_buffer) == 1 and 
+        HTML_SPACE_RE.match(self._data_buffer[0][0])):
+      self._data_buffer = []
+    self._data_buffer.append('<!' + decl + '>')
+    self._after_doctype = True
+
+  def _close_tags_up_to(self, tag):
+    num_pres = 0
+    i = 0
+    for i, t in enumerate(self._tag_stack):
+      if t[1]:
+        num_pres += 1
+      if t[0] == tag:
+        break
+
+      # Only the html tag can close out everything. Put on the brakes if
+      # we encounter a closing tag that we didn't recognize.
+      if tag != 'html' and t[0] in ('body', 'html', 'head'):
+        raise OpenTagNotFoundError()
+
+    self._tag_stack = self._tag_stack[i+1:]
+
+    return num_pres
+
+  def handle_starttag(self, tag, attrs):
+    self._after_doctype = False
+    if tag == 'head':
+      self._in_head = True
+    elif self._in_head and tag == 'title':
+      self._in_title = True
+      self._title_newly_opened = True
+
+    for t in self._tag_stack:
+      closed_by_tags = TAG_SETS.get(t[0])
+      if closed_by_tags and (closed_by_tags == '*' or tag in closed_by_tags):
+        self._in_pre_tag -= self._close_tags_up_to(t[0])
+        break
+
+    has_pre, data, lang = self.build_tag(tag, attrs, False)
+    start_pre = False
+    if (has_pre or self._in_pre_tag > 0 or
+        tag == 'script' or tag == 'style' or tag in self.pre_tags):
+      self._in_pre_tag += 1
+      start_pre = True
+
+    self._tag_stack.insert(0, (tag, start_pre, lang))
+    self._data_buffer.append(data)
+
+  def handle_endtag(self, tag):
+    # According to the spec, <p> tags don't get closed when a parent a
+    # tag closes them. Here's some logic that addresses this.
+    if tag == 'a':
+      contains_p = False
+      for i, t in enumerate(self._tag_stack):
+        if t[0] == 'p':
+          contains_p = True
+        elif t[0] == 'a':
+          break
+      if contains_p: # the p tag, and all its children should be left open
+        a_tag = self._tag_stack.pop(i)
+        if a_tag[1]:
+          self._in_pre_tag -= 1
+    else:
+      if tag == 'head':
+        # TODO: Did we know that we were in an head tag?! If not, we need to
+        # reminify everything to remove extra spaces.
+        self._in_head = False
+      elif tag == 'title':
+        self._in_title = False
+        self._title_newly_opened = False
+      try:
+        self._in_pre_tag -= self._close_tags_up_to(tag)
+      except OpenTagNotFoundError:
+        # Some tags don't require a start tag. Most do. Either way, we leave
+        # closing tags along since they affect output. For instance, a '</p>'
+        # results in a '<p></p>' in Chrome.
+        pass
+    if tag not in NO_CLOSE_TAGS:
+      self._data_buffer.extend(['</', escape.escape_tag(tag), '>'])
+
+  def handle_startendtag(self, tag, attrs):
+    self._after_doctype = False
+    data = self.build_tag(tag, attrs, tag not in NO_CLOSE_TAGS)[1]
+    self._data_buffer.append(data)
+
+  def handle_comment(self, data):
+    if not self.remove_comments or re.match(r'^(?:!|\[if\s)', data):
+      self._data_buffer.append('<!--{}-->'.format(
+          data[1:] if len(data) and data[0] == '!' else data))
+
+  def handle_data(self, data):
+    if self._in_pre_tag > 0:
+      self._data_buffer.append(data)
+    else:
+      # remove_all_empty_space matches everything. remove_empty_space only
+      # matches if there's a newline involved.
+      if self.remove_all_empty_space or self._in_head or self._after_doctype:
+        if HTML_ALL_SPACE_RE.match(data):
+          return
+      elif (self.remove_empty_space and HTML_ALL_SPACE_RE.match(data) and
+            ('\n' in data or '\r' in data)):
+        return
+
+      # if we're in the title, remove leading and trailing whitespace.
+      # note that the title may be parsed in chunks if entityref's or charrefs
+      # are encountered.
+      if self._in_title:
+        if self.__title_trailing_whitespace:
+          self._data_buffer.append(' ')
+        self.__title_trailing_whitespace = (
+          HTML_ALL_SPACE_RE.match(data[-1]) is not None)
+        if self._title_newly_opened:
+          self._title_newly_opened = False
+          data = HTML_LEADING_TRAILING_SPACE_RE.sub('', data)
+        else:
+          data = HTML_TRAILING_SPACE_RE.sub(
+            '', HTML_LEADING_TRAILING_SPACE_RE.sub(' ', data))
+
+      data = HTML_SPACE_RE.sub(' ', data)
+      if not data:
+        return
+
+      if self._in_pre_tag == 0 and self._data_buffer:
+        # If we're not in a pre block, its possible that we append two spaces
+        # together, which we want to avoid. For instance, if we remove a comment
+        # from between two blocks of text: a <!-- B --> c => a  c.
+        if data[0] == ' ' and self._data_buffer[-1][-1] == ' ':
+          data = data[1:]
+          if not data:
+            return
+      self._data_buffer.append(data)
+
+  def handle_entityref(self, data):
+    if self._in_title:
+      if not self._title_newly_opened and self.__title_trailing_whitespace:
+        self._data_buffer.append(' ')
+        self.__title_trailing_whitespace = False
+      self._title_newly_opened = False
+    self._data_buffer.append('&{};'.format(data))
+
+  def handle_charref(self, data):
+    if self._in_title:
+      if not self._title_newly_opened and self.__title_trailing_whitespace:
+        self._data_buffer.append(' ')
+        self.__title_trailing_whitespace = False
+      self._title_newly_opened = False
+    self._data_buffer.append('&#{};'.format(data))
+
+  def handle_pi(self, data):
+    self._data_buffer.append('<?' + data + '>')
+
+  def unknown_decl(self, data):
+    self._data_buffer.append('<![' + data + ']>')
+
+  def reset(self):
+    self._data_buffer = []
+    self._in_pre_tag = 0
+    self._in_head = False
+    self._in_title = False
+    self._after_doctype = False
+    self._tag_stack = []
+    self._title_newly_opened = False
+    self.__title_trailing_whitespace = False
+    HTMLParser.reset(self)
+
+  def unescape(self, val):
+    """Override this method so that we can handle char ref conversion ourself.
+    """
+    return val
+
+  @property
+  def result(self):
+    return ''.join(self._data_buffer)
--- a/Source/Libs/htmlmin/python3html/init.py
+++ b/Source/Libs/htmlmin/python3html/init.py
@@ -0,0 +1,139 @@
+"""
+General functions for HTML manipulation.
+"""
+
+import re as _re
+try:
+    from html.entities import html5 as _html5
+    unichr = chr
+except ImportError:
+    import htmlentitydefs
+    _html5 = {'apos;':u"'"}
+    for k, v in htmlentitydefs.name2codepoint.iteritems():
+        _html5[k + ';'] = unichr(v)
+
+
+__all__ = ['escape', 'unescape']
+
+
+def escape(s, quote=True):
+    """
+    Replace special characters "&", "<" and ">" to HTML-safe sequences.
+    If the optional flag quote is true (the default), the quotation mark
+    characters, both double quote (") and single quote (') characters are also
+    translated.
+    """
+    s = s.replace("&", "&amp;") # Must be done first!
+    s = s.replace("<", "&lt;")
+    s = s.replace(">", "&gt;")
+    if quote:
+        s = s.replace('"', "&quot;")
+        s = s.replace('\'', "&#x27;")
+    return s
+
+
+# see http://www.w3.org/TR/html5/syntax.html#tokenizing-character-references
+
+_invalid_charrefs = {
+    0x00: '\ufffd',  # REPLACEMENT CHARACTER
+    0x0d: '\r',      # CARRIAGE RETURN
+    0x80: '\u20ac',  # EURO SIGN
+    0x81: '\x81',    # <control>
+    0x82: '\u201a',  # SINGLE LOW-9 QUOTATION MARK
+    0x83: '\u0192',  # LATIN SMALL LETTER F WITH HOOK
+    0x84: '\u201e',  # DOUBLE LOW-9 QUOTATION MARK
+    0x85: '\u2026',  # HORIZONTAL ELLIPSIS
+    0x86: '\u2020',  # DAGGER
+    0x87: '\u2021',  # DOUBLE DAGGER
+    0x88: '\u02c6',  # MODIFIER LETTER CIRCUMFLEX ACCENT
+    0x89: '\u2030',  # PER MILLE SIGN
+    0x8a: '\u0160',  # LATIN CAPITAL LETTER S WITH CARON
+    0x8b: '\u2039',  # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    0x8c: '\u0152',  # LATIN CAPITAL LIGATURE OE
+    0x8d: '\x8d',    # <control>
+    0x8e: '\u017d',  # LATIN CAPITAL LETTER Z WITH CARON
+    0x8f: '\x8f',    # <control>
+    0x90: '\x90',    # <control>
+    0x91: '\u2018',  # LEFT SINGLE QUOTATION MARK
+    0x92: '\u2019',  # RIGHT SINGLE QUOTATION MARK
+    0x93: '\u201c',  # LEFT DOUBLE QUOTATION MARK
+    0x94: '\u201d',  # RIGHT DOUBLE QUOTATION MARK
+    0x95: '\u2022',  # BULLET
+    0x96: '\u2013',  # EN DASH
+    0x97: '\u2014',  # EM DASH
+    0x98: '\u02dc',  # SMALL TILDE
+    0x99: '\u2122',  # TRADE MARK SIGN
+    0x9a: '\u0161',  # LATIN SMALL LETTER S WITH CARON
+    0x9b: '\u203a',  # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    0x9c: '\u0153',  # LATIN SMALL LIGATURE OE
+    0x9d: '\x9d',    # <control>
+    0x9e: '\u017e',  # LATIN SMALL LETTER Z WITH CARON
+    0x9f: '\u0178',  # LATIN CAPITAL LETTER Y WITH DIAERESIS
+}
+
+_invalid_codepoints = {
+    # 0x0001 to 0x0008
+    0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
+    # 0x000E to 0x001F
+    0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+    0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    # 0x007F to 0x009F
+    0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a,
+    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    # 0xFDD0 to 0xFDEF
+    0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8,
+    0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfde0, 0xfde1,
+    0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7, 0xfde8, 0xfde9, 0xfdea,
+    0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef,
+    # others
+    0xb, 0xfffe, 0xffff, 0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff,
+    0x4fffe, 0x4ffff, 0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff,
+    0x8fffe, 0x8ffff, 0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff,
+    0xcfffe, 0xcffff, 0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff,
+    0x10fffe, 0x10ffff
+}
+
+
+def _replace_charref(s):
+    s = s.group(1)
+    if s[0] == '#':
+        # numeric charref
+        if s[1] in 'xX':
+            num = int(s[2:].rstrip(';'), 16)
+        else:
+            num = int(s[1:].rstrip(';'))
+        if num in _invalid_charrefs:
+            return _invalid_charrefs[num]
+        if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:
+            return '\uFFFD'
+        if num in _invalid_codepoints:
+            return ''
+        return unichr(num)
+    else:
+        # named charref
+        if s in _html5:
+            return _html5[s]
+        # find the longest matching name (as defined by the standard)
+        for x in range(len(s)-1, 1, -1):
+            if s[:x] in _html5:
+                return _html5[s[:x]] + s[x:]
+        else:
+            return '&' + s
+
+
+_charref = _re.compile(r'&(#[0-9]+;?'
+                       r'|#[xX][0-9a-fA-F]+;?'
+                       r'|[^\t\n\f <&#;]{1,32};?)')
+
+def unescape(s):
+    """
+    Convert all named and numeric character references (e.g. &gt;, &#62;,
+    &x3e;) in the string s to the corresponding unicode characters.
+    This function uses the rules defined by the HTML 5 standard
+    for both valid and invalid character references, and the list of
+    HTML 5 named character references defined in html.entities.html5.
+    """
+    if '&' not in s:
+        return s
+    return _charref.sub(_replace_charref, s)
--- a/Source/Libs/htmlmin/python3html/parser.py
+++ b/Source/Libs/htmlmin/python3html/parser.py
@@ -0,0 +1,481 @@
+"""A parser for HTML and XHTML."""
+
+########
+# This is copied from Python3 and the slightly modified to support needed
+# features. The original file can be found at:
+# https://github.com/python/cpython/blob/44b548dda872c0d4f30afd6b44fd74b053a55ad8/Lib/html/parser.py
+#
+# The largest difference is the reinstatment of the unescape method in 
+# HTMLParser, which is needed for features in htmlmin. Changes are also
+# made to ensure Python2.7 compatability.
+########
+
+
+# This file is based on sgmllib.py, but the API is slightly different.
+
+# XXX There should be a way to distinguish between PCDATA (parsed
+# character data -- the normal case), RCDATA (replaceable character
+# data -- only char and entity references and end tags are special)
+# and CDATA (character data -- only end tags are special).
+
+
+import re
+import warnings
+try:
+    import _markupbase as markupbase
+except ImportError:
+    import markupbase
+
+from . import unescape
+
+
+__all__ = ['HTMLParser']
+
+# Regular expressions used for parsing
+
+interesting_normal = re.compile('[&<]')
+incomplete = re.compile('&[a-zA-Z#]')
+
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
+charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+
+starttagopen = re.compile('<[a-zA-Z]')
+piclose = re.compile('>')
+commentclose = re.compile(r'--\s*>')
+# Note:
+#  1) if you change tagfind/attrfind remember to update locatestarttagend too;
+#  2) if you change tagfind/attrfind and/or locatestarttagend the parser will
+#     explode, so don't do it.
+# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
+# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
+tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
+attrfind_tolerant = re.compile(
+    r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
+    r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
+locatestarttagend_tolerant = re.compile(r"""
+  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
+  (?:[\s/]*                          # optional whitespace before attribute name
+    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
+      (?:\s*=+\s*                    # value indicator
+        (?:'[^']*'                   # LITA-enclosed value
+          |"[^"]*"                   # LIT-enclosed value
+          |(?!['"])[^>\s]*           # bare value
+         )
+         (?:\s*,)*                   # possibly followed by a comma
+       )?(?:\s|/(?!>))*
+     )*
+   )?
+  \s*                                # trailing whitespace
+""", re.VERBOSE)
+endendtag = re.compile('>')
+# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
+# </ and the tag name, so maybe this should be fixed
+endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+
+
+
+class HTMLParser(markupbase.ParserBase):
+    """Find tags and other markup and call handler functions.
+
+    Usage:
+        p = HTMLParser()
+        p.feed(data)
+        ...
+        p.close()
+
+    Start tags are handled by calling self.handle_starttag() or
+    self.handle_startendtag(); end tags by self.handle_endtag().  The
+    data between tags is passed from the parser to the derived class
+    by calling self.handle_data() with the data as argument (the data
+    may be split up in arbitrary chunks).  If convert_charrefs is
+    True the character references are converted automatically to the
+    corresponding Unicode character (and self.handle_data() is no
+    longer split in chunks), otherwise they are passed by calling
+    self.handle_entityref() or self.handle_charref() with the string
+    containing respectively the named or numeric reference as the
+    argument.
+    """
+
+    CDATA_CONTENT_ELEMENTS = ("script", "style")
+
+    def __init__(self, convert_charrefs=True):
+        """Initialize and reset this instance.
+
+        If convert_charrefs is True (the default), all character references
+        are automatically converted to the corresponding Unicode characters.
+        """
+        self.convert_charrefs = convert_charrefs
+        self.reset()
+
+    def reset(self):
+        """Reset this instance.  Loses all unprocessed data."""
+        self.rawdata = ''
+        self.lasttag = '???'
+        self.interesting = interesting_normal
+        self.cdata_elem = None
+        markupbase.ParserBase.reset(self)
+
+    def feed(self, data):
+        r"""Feed data to the parser.
+
+        Call this as often as you want, with as little or as much text
+        as you want (may include '\n').
+        """
+        self.rawdata = self.rawdata + data
+        self.goahead(0)
+
+    def close(self):
+        """Handle any buffered data."""
+        self.goahead(1)
+
+    __starttag_text = None
+
+    def get_starttag_text(self):
+        """Return full source of start tag: '<...>'."""
+        return self.__starttag_text
+
+    def set_cdata_mode(self, elem):
+        self.cdata_elem = elem.lower()
+        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+
+    def clear_cdata_mode(self):
+        self.interesting = interesting_normal
+        self.cdata_elem = None
+
+    # Internal -- handle data as far as reasonable.  May leave state
+    # and data to be processed by a subsequent call.  If 'end' is
+    # true, force handling all data as if followed by EOF marker.
+    def goahead(self, end):
+        rawdata = self.rawdata
+        i = 0
+        n = len(rawdata)
+        while i < n:
+            if self.convert_charrefs and not self.cdata_elem:
+                j = rawdata.find('<', i)
+                if j < 0:
+                    # if we can't find the next <, either we are at the end
+                    # or there's more text incoming.  If the latter is True,
+                    # we can't pass the text to handle_data in case we have
+                    # a charref cut in half at end.  Try to determine if
+                    # this is the case before proceeding by looking for an
+                    # & near the end and see if it's followed by a space or ;.
+                    amppos = rawdata.rfind('&', max(i, n-34))
+                    if (amppos >= 0 and
+                        not re.compile(r'[\s;]').search(rawdata, amppos)):
+                        break  # wait till we get all the text
+                    j = n
+            else:
+                match = self.interesting.search(rawdata, i)  # < or &
+                if match:
+                    j = match.start()
+                else:
+                    if self.cdata_elem:
+                        break
+                    j = n
+            if i < j:
+                if self.convert_charrefs and not self.cdata_elem:
+                    self.handle_data(self.unescape(rawdata[i:j]))
+                else:
+                    self.handle_data(rawdata[i:j])
+            i = self.updatepos(i, j)
+            if i == n: break
+            startswith = rawdata.startswith
+            if startswith('<', i):
+                if starttagopen.match(rawdata, i): # < + letter
+                    k = self.parse_starttag(i)
+                elif startswith("</", i):
+                    k = self.parse_endtag(i)
+                elif startswith("<!--", i):
+                    k = self.parse_comment(i)
+                elif startswith("<?", i):
+                    k = self.parse_pi(i)
+                elif startswith("<!", i):
+                    k = self.parse_html_declaration(i)
+                elif (i + 1) < n:
+                    self.handle_data("<")
+                    k = i + 1
+                else:
+                    break
+                if k < 0:
+                    if not end:
+                        break
+                    k = rawdata.find('>', i + 1)
+                    if k < 0:
+                        k = rawdata.find('<', i + 1)
+                        if k < 0:
+                            k = i + 1
+                    else:
+                        k += 1
+                    if self.convert_charrefs and not self.cdata_elem:
+                        self.handle_data(self.unescape(rawdata[i:k]))
+                    else:
+                        self.handle_data(rawdata[i:k])
+                i = self.updatepos(i, k)
+            elif startswith("&#", i):
+                match = charref.match(rawdata, i)
+                if match:
+                    name = match.group()[2:-1]
+                    self.handle_charref(name)
+                    k = match.end()
+                    if not startswith(';', k-1):
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                else:
+                    if ";" in rawdata[i:]:  # bail by consuming &#
+                        self.handle_data(rawdata[i:i+2])
+                        i = self.updatepos(i, i+2)
+                    break
+            elif startswith('&', i):
+                match = entityref.match(rawdata, i)
+                if match:
+                    name = match.group(1)
+                    self.handle_entityref(name)
+                    k = match.end()
+                    if not startswith(';', k-1):
+                        k = k - 1
+                    i = self.updatepos(i, k)
+                    continue
+                match = incomplete.match(rawdata, i)
+                if match:
+                    # match.group() will contain at least 2 chars
+                    if end and match.group() == rawdata[i:]:
+                        k = match.end()
+                        if k <= i:
+                            k = n
+                        i = self.updatepos(i, i + 1)
+                    # incomplete
+                    break
+                elif (i + 1) < n:
+                    # not the end of the buffer, and can't be confused
+                    # with some other construct
+                    self.handle_data("&")
+                    i = self.updatepos(i, i + 1)
+                else:
+                    break
+            else:
+                assert 0, "interesting.search() lied"
+        # end while
+        if end and i < n and not self.cdata_elem:
+            if self.convert_charrefs and not self.cdata_elem:
+                self.handle_data(self.unescape(rawdata[i:n]))
+            else:
+                self.handle_data(rawdata[i:n])
+            i = self.updatepos(i, n)
+        self.rawdata = rawdata[i:]
+
+    # Internal -- parse html declarations, return length or -1 if not terminated
+    # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
+    # See also parse_declaration in _markupbase
+    def parse_html_declaration(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == '<!', ('unexpected call to '
+                                        'parse_html_declaration()')
+        if rawdata[i:i+4] == '<!--':
+            # this case is actually already handled in goahead()
+            return self.parse_comment(i)
+        elif rawdata[i:i+3] == '<![':
+            return self.parse_marked_section(i)
+        elif rawdata[i:i+9].lower() == '<!doctype':
+            # find the closing >
+            gtpos = rawdata.find('>', i+9)
+            if gtpos == -1:
+                return -1
+            self.handle_decl(rawdata[i+2:gtpos])
+            return gtpos+1
+        else:
+            return self.parse_bogus_comment(i)
+
+    # Internal -- parse bogus comment, return length or -1 if not terminated
+    # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
+    def parse_bogus_comment(self, i, report=1):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
+                                                'parse_comment()')
+        pos = rawdata.find('>', i+2)
+        if pos == -1:
+            return -1
+        if report:
+            self.handle_comment(rawdata[i+2:pos])
+        return pos + 1
+
+    # Internal -- parse processing instr, return end or -1 if not terminated
+    def parse_pi(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
+        match = piclose.search(rawdata, i+2) # >
+        if not match:
+            return -1
+        j = match.start()
+        self.handle_pi(rawdata[i+2: j])
+        j = match.end()
+        return j
+
+    # Internal -- handle starttag, return end or -1 if not terminated
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = tagfind_tolerant.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = match.group(1).lower()
+        while k < endpos:
+            m = attrfind_tolerant.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+            if attrvalue:
+                attrvalue = self.unescape(attrvalue)
+            attrs.append((attrname.lower(), attrvalue))
+            k = m.end()
+
+        end = rawdata[k:endpos].strip()
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + self.__starttag_text.count("\n")
+                offset = len(self.__starttag_text) \
+                         - self.__starttag_text.rfind("\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            self.handle_data(rawdata[i:endpos])
+            return endpos
+        if end.endswith('/>'):
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            self.handle_starttag(tag, attrs)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+        return endpos
+
+    # Internal -- check to see if we have a complete starttag; return end
+    # or -1 if incomplete.
+    def check_for_whole_start_tag(self, i):
+        rawdata = self.rawdata
+        m = locatestarttagend_tolerant.match(rawdata, i)
+        if m:
+            j = m.end()
+            next = rawdata[j:j+1]
+            if next == ">":
+                return j + 1
+            if next == "/":
+                if rawdata.startswith("/>", j):
+                    return j + 2
+                if rawdata.startswith("/", j):
+                    # buffer boundary
+                    return -1
+                # else bogus input
+                if j > i:
+                    return j
+                else:
+                    return i + 1
+            if next == "":
+                # end of input
+                return -1
+            if next in ("abcdefghijklmnopqrstuvwxyz=/"
+                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+                # end of input in or before attribute value, or we have the
+                # '/' from a '/>' ending
+                return -1
+            if j > i:
+                return j
+            else:
+                return i + 1
+        raise AssertionError("we should not get here!")
+
+    # Internal -- parse endtag, return end or -1 if incomplete
+    def parse_endtag(self, i):
+        rawdata = self.rawdata
+        assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
+        match = endendtag.search(rawdata, i+1) # >
+        if not match:
+            return -1
+        gtpos = match.end()
+        match = endtagfind.match(rawdata, i) # </ + tag + >
+        if not match:
+            if self.cdata_elem is not None:
+                self.handle_data(rawdata[i:gtpos])
+                return gtpos
+            # find the name: w3.org/TR/html5/tokenization.html#tag-name-state
+            namematch = tagfind_tolerant.match(rawdata, i+2)
+            if not namematch:
+                # w3.org/TR/html5/tokenization.html#end-tag-open-state
+                if rawdata[i:i+3] == '</>':
+                    return i+3
+                else:
+                    return self.parse_bogus_comment(i)
+            tagname = namematch.group(1).lower()
+            # consume and ignore other stuff between the name and the >
+            # Note: this is not 100% correct, since we might have things like
+            # </tag attr=">">, but looking for > after tha name should cover
+            # most of the cases and is much simpler
+            gtpos = rawdata.find('>', namematch.end())
+            self.handle_endtag(tagname)
+            return gtpos+1
+
+        elem = match.group(1).lower() # script or style
+        if self.cdata_elem is not None:
+            if elem != self.cdata_elem:
+                self.handle_data(rawdata[i:gtpos])
+                return gtpos
+
+        self.handle_endtag(elem.lower())
+        self.clear_cdata_mode()
+        return gtpos
+
+    # Overridable -- finish processing of start+end tag: <tag.../>
+    def handle_startendtag(self, tag, attrs):
+        self.handle_starttag(tag, attrs)
+        self.handle_endtag(tag)
+
+    # Overridable -- handle start tag
+    def handle_starttag(self, tag, attrs):
+        pass
+
+    # Overridable -- handle end tag
+    def handle_endtag(self, tag):
+        pass
+
+    # Overridable -- handle character reference
+    def handle_charref(self, name):
+        pass
+
+    # Overridable -- handle entity reference
+    def handle_entityref(self, name):
+        pass
+
+    # Overridable -- handle data
+    def handle_data(self, data):
+        pass
+
+    # Overridable -- handle comment
+    def handle_comment(self, data):
+        pass
+
+    # Overridable -- handle declaration
+    def handle_decl(self, decl):
+        pass
+
+    # Overridable -- handle processing instruction
+    def handle_pi(self, data):
+        pass
+
+    def unknown_decl(self, data):
+        pass
+
+    # Internal -- helper to remove special character quoting
+    def unescape(self, s):
+        return unescape(s)