commit 58880ac27ac45d0da48924cbb567b1f32a52d24a Author: octospacc Date: Mon May 16 20:12:06 2022 +0200 First working generator diff --git a/Assets/Style.css b/Assets/Style.css new file mode 100755 index 0000000..8be203f --- /dev/null +++ b/Assets/Style.css @@ -0,0 +1,44 @@ +:root { + --ScreenBorderPadding: 4px; +} + +* { + Box-Sizing: Border-Box; +} + +Body { + Margin: 0; + Padding: var(--ScreenBorderPadding); + Box-Sizing: Border-Box; + Width: 100vw; + Height: 100vh; +} +#Container { + Box-Sizing: Border-Box; +} + +#TopBox { + Border: 2px Solid Purple; + Padding: 8px; +} +#LeftBox { + Float: Left; + //Border: 2px Solid Purple; +} +#RightBox { + Position: Inherit; + Top: 16px; + Right: 16px; + Float: Right; +} +#MainBox { + Text-Align: Center; + Margin: Auto; +} +#BottomBox { + Box-Sizing: Border-Box; + //Border: 2px Solid Purple; + Position: Fixed; + Bottom: var(--ScreenBorderPadding); + Width: 100vw; +} diff --git a/Build.py b/Build.py new file mode 100755 index 0000000..394fb79 --- /dev/null +++ b/Build.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +import os +import shutil +from pathlib import Path +from Libs.markdown import Markdown + +def ReadFile(p): + try: + with open(p, 'r') as f: + return f.read() + except Exception: + print("Error reading file {}".format(p)) + return None + +def WriteFile(p, c): + try: + with open(p, 'w') as f: + f.write(c) + return True + except Exception: + print("Error writing file {}".format(p)) + return False + +def ResetPublic(): + try: + shutil.rmtree('public') + except FileNotFoundError: + pass +# os.mkdir('public') + +def GetTitles(c): + Titles = [] + + return Titles + +def FormatTitles(Titles): + return '' + +""" +def GetAssociations(): + f = ReadFile('Associations.txt') + Templates = f.splitlines() + Uses = {} + for t in Templates: + t = t.split(' ') + Uses.update({t[0][:-1]: t[1:]}) + print(Uses) + return Uses + +def LoadTemplates(): + Templates = {} + for p in Path('Templates').rglob('*.html'): + p = str(p)[len('Templates')+1:] + Templates.update({p: ReadFile('Parts/{}'.format(p))}) + return Templates + + Templates = {} + for t in Associations: + Templates.update({t: ReadFile('Templates/{}'.format(t))}) + return Templates + +def LoadParts(): + Parts = {} + for p in Path('Parts').rglob('*.html'): + p = str(p)[len('Parts')+1:] + Parts.update({p: ReadFile('Parts/{}'.format(p))}) + return Parts +""" + +def LoadFromDir(Dir): + Contents = {} + for File in Path(Dir).rglob('*.html'): + File = str(File)[len(Dir)+1:] + Contents.update({File: ReadFile('{}/{}'.format(Dir, File))}) + return Contents + +def PreProcessor(File): + File = ReadFile(File) + Content, Titles, Meta = '', [], { + 'Template': 'Standard.html', + 'Background': '', + 'Style': ''} + for l in File.splitlines(): + ls = l.lstrip() + if ls.startswith('\%'): + Content += ls[1:] + '\n' + elif ls.startswith('% - '): + Content += ''.format(ls[4:]) + '\n' + elif ls.startswith('% Template: '): + Meta['Template'] = ls[len('% Template: '):] + elif ls.startswith('% Background: '): + Meta['Background'] = ls[len('% Background: '):] + elif ls.startswith('% Style: '): + Meta['Style'] += ls[len('% Style: '):] + ' ' + else: + Content += l + '\n' + Heading = ls.split(' ')[0].count('#') + if Heading > 0: + Titles += [ls] + return Content, Titles, Meta + +""" +def MetaProcessor(Meta): + Meta[] + return Meta +""" + +def PatchHTML(Template, Parts, Content, Titles, Meta): + HTMLTitles = FormatTitles(Titles) + + Template = Template.replace('[HTML:Page:Title]', 'Untitled' if not Titles else Titles[0]) + Template = Template.replace('[HTML:Page:Style]', Meta['Style']) + Template = Template.replace('[HTML:Page:RightBox]', str(Titles)) + Template = Template.replace('[HTML:Page:MainBox]', Content) + + for p in Parts: + Template = Template.replace('[HTML:Part:{}]'.format(p), Parts[p]) + print(Template) + return Template + +def MakeSite(Templates, Parts): + Pages = [] + for File in Path('Pages').rglob('*.md'): + File = str(File)[len('Pages/'):] + Content, Titles, Meta = PreProcessor('Pages/{}'.format(File)) + + Template = Templates[Meta['Template']] + Template = Template.replace( + 'Style.css', + '{}Style.css'.format('../'*File.count('/'))) + + print(Content, Titles, Meta) + WriteFile( + 'public/{}.html'.format(File.rstrip('.md')), + PatchHTML( + Template, + Parts, + Markdown().convert(Content), + Titles, + Meta)) + +def IgnoreFiles(dir, files): + return [f for f in files if os.path.isfile(os.path.join(dir, f))] +def CopyAssets(): + shutil.copytree('Pages', 'public', ignore=IgnoreFiles) +# for File in Path('Assets').rglob('*'): +# File = str(File) +# shutil.copy(File, 'public/{}'.format(File[len('Assets/'):])) + os.system("cp -R Assets/* public/") + +def Main(): + os.chdir(os.path.dirname(os.path.abspath(__file__))) + ResetPublic() +# Associations = GetAssociations() + Templates = LoadFromDir('Templates') #LoadTemplates(Associations) + Parts = LoadFromDir('Parts') #LoadParts() + print(Templates, Parts) + CopyAssets() + MakeSite(Templates, Parts) + +if __name__ == '__main__': + Main() diff --git a/Build.sh b/Build.sh new file mode 100755 index 0000000..e41ae43 --- /dev/null +++ b/Build.sh @@ -0,0 +1,10 @@ +#!/bin/sh +cd "$( dirname "$( realpath "$0" )" )" + +mkdir -p public + +cp -R Pages/* public/ +find public -type f -name "*.html" -exec sh -c 'cat Templates/Standard.html > "$1"' _ {} \; +find public -type f -name "*.html" -print0 | xargs -0 sed -i -e 's/\[HTML\:PageTitle\]/ERRE/g' + +cp Style.css public/ diff --git a/Libs/markdown/__init__.py b/Libs/markdown/__init__.py new file mode 100644 index 0000000..e05af10 --- /dev/null +++ b/Libs/markdown/__init__.py @@ -0,0 +1,61 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import sys + +# TODO: Remove this check at some point in the future. +# (also remove flake8's 'ignore E402' comments below) +if sys.version_info[0] < 3: # pragma: no cover + raise ImportError('A recent version of Python 3 is required.') + +from .core import Markdown, markdown, markdownFromFile # noqa: E402 +from .util import PY37 # noqa: E402 +from .pep562 import Pep562 # noqa: E402 +from .__meta__ import __version__, __version_info__ # noqa: E402 +import warnings # noqa: E402 + +# For backward compatibility as some extensions expect it... +from .extensions import Extension # noqa + +__all__ = ['Markdown', 'markdown', 'markdownFromFile'] + +__deprecated__ = { + "version": ("__version__", __version__), + "version_info": ("__version_info__", __version_info__) +} + + +def __getattr__(name): + """Get attribute.""" + + deprecated = __deprecated__.get(name) + if deprecated: + warnings.warn( + "'{}' is deprecated. Use '{}' instead.".format(name, deprecated[0]), + category=DeprecationWarning, + stacklevel=(3 if PY37 else 4) + ) + return deprecated[1] + raise AttributeError("module '{}' has no attribute '{}'".format(__name__, name)) + + +if not PY37: + Pep562(__name__) diff --git a/Libs/markdown/__main__.py b/Libs/markdown/__main__.py new file mode 100644 index 0000000..0184008 --- /dev/null +++ b/Libs/markdown/__main__.py @@ -0,0 +1,151 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import sys +import optparse +import codecs +import warnings +import markdown +try: + # We use `unsafe_load` because users may need to pass in actual Python + # objects. As this is only available from the CLI, the user has much + # worse problems if an attacker can use this as an attach vector. + from yaml import unsafe_load as yaml_load +except ImportError: # pragma: no cover + try: + # Fall back to PyYAML <5.1 + from yaml import load as yaml_load + except ImportError: + # Fall back to JSON + from json import load as yaml_load + +import logging +from logging import DEBUG, WARNING, CRITICAL + +logger = logging.getLogger('MARKDOWN') + + +def parse_options(args=None, values=None): + """ + Define and parse `optparse` options for command-line usage. + """ + usage = """%prog [options] [INPUTFILE] + (STDIN is assumed if no INPUTFILE is given)""" + desc = "A Python implementation of John Gruber's Markdown. " \ + "https://Python-Markdown.github.io/" + ver = "%%prog %s" % markdown.__version__ + + parser = optparse.OptionParser(usage=usage, description=desc, version=ver) + parser.add_option("-f", "--file", dest="filename", default=None, + help="Write output to OUTPUT_FILE. Defaults to STDOUT.", + metavar="OUTPUT_FILE") + parser.add_option("-e", "--encoding", dest="encoding", + help="Encoding for input and output files.",) + parser.add_option("-o", "--output_format", dest="output_format", + default='xhtml', metavar="OUTPUT_FORMAT", + help="Use output format 'xhtml' (default) or 'html'.") + parser.add_option("-n", "--no_lazy_ol", dest="lazy_ol", + action='store_false', default=True, + help="Observe number of first item of ordered lists.") + parser.add_option("-x", "--extension", action="append", dest="extensions", + help="Load extension EXTENSION.", metavar="EXTENSION") + parser.add_option("-c", "--extension_configs", + dest="configfile", default=None, + help="Read extension configurations from CONFIG_FILE. " + "CONFIG_FILE must be of JSON or YAML format. YAML " + "format requires that a python YAML library be " + "installed. The parsed JSON or YAML must result in a " + "python dictionary which would be accepted by the " + "'extension_configs' keyword on the markdown.Markdown " + "class. The extensions must also be loaded with the " + "`--extension` option.", + metavar="CONFIG_FILE") + parser.add_option("-q", "--quiet", default=CRITICAL, + action="store_const", const=CRITICAL+10, dest="verbose", + help="Suppress all warnings.") + parser.add_option("-v", "--verbose", + action="store_const", const=WARNING, dest="verbose", + help="Print all warnings.") + parser.add_option("--noisy", + action="store_const", const=DEBUG, dest="verbose", + help="Print debug messages.") + + (options, args) = parser.parse_args(args, values) + + if len(args) == 0: + input_file = None + else: + input_file = args[0] + + if not options.extensions: + options.extensions = [] + + extension_configs = {} + if options.configfile: + with codecs.open( + options.configfile, mode="r", encoding=options.encoding + ) as fp: + try: + extension_configs = yaml_load(fp) + except Exception as e: + message = "Failed parsing extension config file: %s" % \ + options.configfile + e.args = (message,) + e.args[1:] + raise + + opts = { + 'input': input_file, + 'output': options.filename, + 'extensions': options.extensions, + 'extension_configs': extension_configs, + 'encoding': options.encoding, + 'output_format': options.output_format, + 'lazy_ol': options.lazy_ol + } + + return opts, options.verbose + + +def run(): # pragma: no cover + """Run Markdown from the command line.""" + + # Parse options and adjust logging level if necessary + options, logging_level = parse_options() + if not options: + sys.exit(2) + logger.setLevel(logging_level) + console_handler = logging.StreamHandler() + logger.addHandler(console_handler) + if logging_level <= WARNING: + # Ensure deprecation warnings get displayed + warnings.filterwarnings('default') + logging.captureWarnings(True) + warn_logger = logging.getLogger('py.warnings') + warn_logger.addHandler(console_handler) + + # Run + markdown.markdownFromFile(**options) + + +if __name__ == '__main__': # pragma: no cover + # Support running module as a commandline command. + # `python -m markdown [options] [args]`. + run() diff --git a/Libs/markdown/__meta__.py b/Libs/markdown/__meta__.py new file mode 100644 index 0000000..84884b6 --- /dev/null +++ b/Libs/markdown/__meta__.py @@ -0,0 +1,49 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +# __version_info__ format: +# (major, minor, patch, dev/alpha/beta/rc/final, #) +# (1, 1, 2, 'dev', 0) => "1.1.2.dev0" +# (1, 1, 2, 'alpha', 1) => "1.1.2a1" +# (1, 2, 0, 'beta', 2) => "1.2b2" +# (1, 2, 0, 'rc', 4) => "1.2rc4" +# (1, 2, 0, 'final', 0) => "1.2" +__version_info__ = (3, 3, 7, 'final', 0) + + +def _get_version(version_info): + " Returns a PEP 440-compliant version number from version_info. " + assert len(version_info) == 5 + assert version_info[3] in ('dev', 'alpha', 'beta', 'rc', 'final') + + parts = 2 if version_info[2] == 0 else 3 + v = '.'.join(map(str, version_info[:parts])) + + if version_info[3] == 'dev': + v += '.dev' + str(version_info[4]) + elif version_info[3] != 'final': + mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'rc'} + v += mapping[version_info[3]] + str(version_info[4]) + + return v + + +__version__ = _get_version(__version_info__) diff --git a/Libs/markdown/__pycache__/__init__.cpython-310.pyc b/Libs/markdown/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..3598f4c Binary files /dev/null and b/Libs/markdown/__pycache__/__init__.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/__meta__.cpython-310.pyc b/Libs/markdown/__pycache__/__meta__.cpython-310.pyc new file mode 100644 index 0000000..6eba0c0 Binary files /dev/null and b/Libs/markdown/__pycache__/__meta__.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/blockparser.cpython-310.pyc b/Libs/markdown/__pycache__/blockparser.cpython-310.pyc new file mode 100644 index 0000000..c49592b Binary files /dev/null and b/Libs/markdown/__pycache__/blockparser.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/blockprocessors.cpython-310.pyc b/Libs/markdown/__pycache__/blockprocessors.cpython-310.pyc new file mode 100644 index 0000000..d321339 Binary files /dev/null and b/Libs/markdown/__pycache__/blockprocessors.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/core.cpython-310.pyc b/Libs/markdown/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000..e1c9c22 Binary files /dev/null and b/Libs/markdown/__pycache__/core.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/htmlparser.cpython-310.pyc b/Libs/markdown/__pycache__/htmlparser.cpython-310.pyc new file mode 100644 index 0000000..6f35303 Binary files /dev/null and b/Libs/markdown/__pycache__/htmlparser.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/inlinepatterns.cpython-310.pyc b/Libs/markdown/__pycache__/inlinepatterns.cpython-310.pyc new file mode 100644 index 0000000..36311d0 Binary files /dev/null and b/Libs/markdown/__pycache__/inlinepatterns.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/pep562.cpython-310.pyc b/Libs/markdown/__pycache__/pep562.cpython-310.pyc new file mode 100644 index 0000000..78221da Binary files /dev/null and b/Libs/markdown/__pycache__/pep562.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/postprocessors.cpython-310.pyc b/Libs/markdown/__pycache__/postprocessors.cpython-310.pyc new file mode 100644 index 0000000..7b6bb74 Binary files /dev/null and b/Libs/markdown/__pycache__/postprocessors.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/preprocessors.cpython-310.pyc b/Libs/markdown/__pycache__/preprocessors.cpython-310.pyc new file mode 100644 index 0000000..0e3a9c5 Binary files /dev/null and b/Libs/markdown/__pycache__/preprocessors.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/serializers.cpython-310.pyc b/Libs/markdown/__pycache__/serializers.cpython-310.pyc new file mode 100644 index 0000000..e20d038 Binary files /dev/null and b/Libs/markdown/__pycache__/serializers.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/treeprocessors.cpython-310.pyc b/Libs/markdown/__pycache__/treeprocessors.cpython-310.pyc new file mode 100644 index 0000000..3337ab0 Binary files /dev/null and b/Libs/markdown/__pycache__/treeprocessors.cpython-310.pyc differ diff --git a/Libs/markdown/__pycache__/util.cpython-310.pyc b/Libs/markdown/__pycache__/util.cpython-310.pyc new file mode 100644 index 0000000..078af9d Binary files /dev/null and b/Libs/markdown/__pycache__/util.cpython-310.pyc differ diff --git a/Libs/markdown/blockparser.py b/Libs/markdown/blockparser.py new file mode 100644 index 0000000..39219fd --- /dev/null +++ b/Libs/markdown/blockparser.py @@ -0,0 +1,125 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import xml.etree.ElementTree as etree +from . import util + + +class State(list): + """ Track the current and nested state of the parser. + + This utility class is used to track the state of the BlockParser and + support multiple levels if nesting. It's just a simple API wrapped around + a list. Each time a state is set, that state is appended to the end of the + list. Each time a state is reset, that state is removed from the end of + the list. + + Therefore, each time a state is set for a nested block, that state must be + reset when we back out of that level of nesting or the state could be + corrupted. + + While all the methods of a list object are available, only the three + defined below need be used. + + """ + + def set(self, state): + """ Set a new state. """ + self.append(state) + + def reset(self): + """ Step back one step in nested state. """ + self.pop() + + def isstate(self, state): + """ Test that top (current) level is of given state. """ + if len(self): + return self[-1] == state + else: + return False + + +class BlockParser: + """ Parse Markdown blocks into an ElementTree object. + + A wrapper class that stitches the various BlockProcessors together, + looping through them and creating an ElementTree object. + """ + + def __init__(self, md): + self.blockprocessors = util.Registry() + self.state = State() + self.md = md + + @property + @util.deprecated("Use 'md' instead.") + def markdown(self): + # TODO: remove this later + return self.md + + def parseDocument(self, lines): + """ Parse a markdown document into an ElementTree. + + Given a list of lines, an ElementTree object (not just a parent + Element) is created and the root element is passed to the parser + as the parent. The ElementTree object is returned. + + This should only be called on an entire document, not pieces. + + """ + # Create a ElementTree from the lines + self.root = etree.Element(self.md.doc_tag) + self.parseChunk(self.root, '\n'.join(lines)) + return etree.ElementTree(self.root) + + def parseChunk(self, parent, text): + """ Parse a chunk of markdown text and attach to given etree node. + + While the ``text`` argument is generally assumed to contain multiple + blocks which will be split on blank lines, it could contain only one + block. Generally, this method would be called by extensions when + block parsing is required. + + The ``parent`` etree Element passed in is altered in place. + Nothing is returned. + + """ + self.parseBlocks(parent, text.split('\n\n')) + + def parseBlocks(self, parent, blocks): + """ Process blocks of markdown text and attach to given etree node. + + Given a list of ``blocks``, each blockprocessor is stepped through + until there are no blocks left. While an extension could potentially + call this method directly, it's generally expected to be used + internally. + + This is a public method as an extension may need to add/alter + additional BlockProcessors which call this method to recursively + parse a nested block. + + """ + while blocks: + for processor in self.blockprocessors: + if processor.test(parent, blocks[0]): + if processor.run(parent, blocks) is not False: + # run returns True or None + break diff --git a/Libs/markdown/blockprocessors.py b/Libs/markdown/blockprocessors.py new file mode 100644 index 0000000..3d0ff86 --- /dev/null +++ b/Libs/markdown/blockprocessors.py @@ -0,0 +1,623 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + +CORE MARKDOWN BLOCKPARSER +=========================================================================== + +This parser handles basic parsing of Markdown blocks. It doesn't concern +itself with inline elements such as **bold** or *italics*, but rather just +catches blocks, lists, quotes, etc. + +The BlockParser is made up of a bunch of BlockProcessors, each handling a +different type of block. Extensions may add/replace/remove BlockProcessors +as they need to alter how markdown blocks are parsed. +""" + +import logging +import re +import xml.etree.ElementTree as etree +from . import util +from .blockparser import BlockParser + +logger = logging.getLogger('MARKDOWN') + + +def build_block_parser(md, **kwargs): + """ Build the default block parser used by Markdown. """ + parser = BlockParser(md) + parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100) + parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90) + parser.blockprocessors.register(CodeBlockProcessor(parser), 'code', 80) + parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 70) + parser.blockprocessors.register(SetextHeaderProcessor(parser), 'setextheader', 60) + parser.blockprocessors.register(HRProcessor(parser), 'hr', 50) + parser.blockprocessors.register(OListProcessor(parser), 'olist', 40) + parser.blockprocessors.register(UListProcessor(parser), 'ulist', 30) + parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 20) + parser.blockprocessors.register(ReferenceProcessor(parser), 'reference', 15) + parser.blockprocessors.register(ParagraphProcessor(parser), 'paragraph', 10) + return parser + + +class BlockProcessor: + """ Base class for block processors. + + Each subclass will provide the methods below to work with the source and + tree. Each processor will need to define it's own ``test`` and ``run`` + methods. The ``test`` method should return True or False, to indicate + whether the current block should be processed by this processor. If the + test passes, the parser will call the processors ``run`` method. + + """ + + def __init__(self, parser): + self.parser = parser + self.tab_length = parser.md.tab_length + + def lastChild(self, parent): + """ Return the last child of an etree element. """ + if len(parent): + return parent[-1] + else: + return None + + def detab(self, text, length=None): + """ Remove a tab from the front of each line of the given text. """ + if length is None: + length = self.tab_length + newtext = [] + lines = text.split('\n') + for line in lines: + if line.startswith(' ' * length): + newtext.append(line[length:]) + elif not line.strip(): + newtext.append('') + else: + break + return '\n'.join(newtext), '\n'.join(lines[len(newtext):]) + + def looseDetab(self, text, level=1): + """ Remove a tab from front of lines but allowing dedented lines. """ + lines = text.split('\n') + for i in range(len(lines)): + if lines[i].startswith(' '*self.tab_length*level): + lines[i] = lines[i][self.tab_length*level:] + return '\n'.join(lines) + + def test(self, parent, block): + """ Test for block type. Must be overridden by subclasses. + + As the parser loops through processors, it will call the ``test`` + method on each to determine if the given block of text is of that + type. This method must return a boolean ``True`` or ``False``. The + actual method of testing is left to the needs of that particular + block type. It could be as simple as ``block.startswith(some_string)`` + or a complex regular expression. As the block type may be different + depending on the parent of the block (i.e. inside a list), the parent + etree element is also provided and may be used as part of the test. + + Keywords: + + * ``parent``: A etree element which will be the parent of the block. + * ``block``: A block of text from the source which has been split at + blank lines. + """ + pass # pragma: no cover + + def run(self, parent, blocks): + """ Run processor. Must be overridden by subclasses. + + When the parser determines the appropriate type of a block, the parser + will call the corresponding processor's ``run`` method. This method + should parse the individual lines of the block and append them to + the etree. + + Note that both the ``parent`` and ``etree`` keywords are pointers + to instances of the objects which should be edited in place. Each + processor must make changes to the existing objects as there is no + mechanism to return new/different objects to replace them. + + This means that this method should be adding SubElements or adding text + to the parent, and should remove (``pop``) or add (``insert``) items to + the list of blocks. + + Keywords: + + * ``parent``: A etree element which is the parent of the current block. + * ``blocks``: A list of all remaining blocks of the document. + """ + pass # pragma: no cover + + +class ListIndentProcessor(BlockProcessor): + """ Process children of list items. + + Example: + * a list item + process this part + + or this part + + """ + + ITEM_TYPES = ['li'] + LIST_TYPES = ['ul', 'ol'] + + def __init__(self, *args): + super().__init__(*args) + self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) + + def test(self, parent, block): + return block.startswith(' '*self.tab_length) and \ + not self.parser.state.isstate('detabbed') and \ + (parent.tag in self.ITEM_TYPES or + (len(parent) and parent[-1] is not None and + (parent[-1].tag in self.LIST_TYPES))) + + def run(self, parent, blocks): + block = blocks.pop(0) + level, sibling = self.get_level(parent, block) + block = self.looseDetab(block, level) + + self.parser.state.set('detabbed') + if parent.tag in self.ITEM_TYPES: + # It's possible that this parent has a 'ul' or 'ol' child list + # with a member. If that is the case, then that should be the + # parent. This is intended to catch the edge case of an indented + # list whose first member was parsed previous to this point + # see OListProcessor + if len(parent) and parent[-1].tag in self.LIST_TYPES: + self.parser.parseBlocks(parent[-1], [block]) + else: + # The parent is already a li. Just parse the child block. + self.parser.parseBlocks(parent, [block]) + elif sibling.tag in self.ITEM_TYPES: + # The sibling is a li. Use it as parent. + self.parser.parseBlocks(sibling, [block]) + elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES: + # The parent is a list (``ol`` or ``ul``) which has children. + # Assume the last child li is the parent of this block. + if sibling[-1].text: + # If the parent li has text, that text needs to be moved to a p + # The p must be 'inserted' at beginning of list in the event + # that other children already exist i.e.; a nested sublist. + p = etree.Element('p') + p.text = sibling[-1].text + sibling[-1].text = '' + sibling[-1].insert(0, p) + self.parser.parseChunk(sibling[-1], block) + else: + self.create_item(sibling, block) + self.parser.state.reset() + + def create_item(self, parent, block): + """ Create a new li and parse the block with it as the parent. """ + li = etree.SubElement(parent, 'li') + self.parser.parseBlocks(li, [block]) + + def get_level(self, parent, block): + """ Get level of indent based on list level. """ + # Get indent level + m = self.INDENT_RE.match(block) + if m: + indent_level = len(m.group(1))/self.tab_length + else: + indent_level = 0 + if self.parser.state.isstate('list'): + # We're in a tightlist - so we already are at correct parent. + level = 1 + else: + # We're in a looselist - so we need to find parent. + level = 0 + # Step through children of tree to find matching indent level. + while indent_level > level: + child = self.lastChild(parent) + if (child is not None and + (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)): + if child.tag in self.LIST_TYPES: + level += 1 + parent = child + else: + # No more child levels. If we're short of indent_level, + # we have a code block. So we stop here. + break + return level, parent + + +class CodeBlockProcessor(BlockProcessor): + """ Process code blocks. """ + + def test(self, parent, block): + return block.startswith(' '*self.tab_length) + + def run(self, parent, blocks): + sibling = self.lastChild(parent) + block = blocks.pop(0) + theRest = '' + if (sibling is not None and sibling.tag == "pre" and + len(sibling) and sibling[0].tag == "code"): + # The previous block was a code block. As blank lines do not start + # new code blocks, append this block to the previous, adding back + # linebreaks removed from the split into a list. + code = sibling[0] + block, theRest = self.detab(block) + code.text = util.AtomicString( + '{}\n{}\n'.format(code.text, util.code_escape(block.rstrip())) + ) + else: + # This is a new codeblock. Create the elements and insert text. + pre = etree.SubElement(parent, 'pre') + code = etree.SubElement(pre, 'code') + block, theRest = self.detab(block) + code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip())) + if theRest: + # This block contained unindented line(s) after the first indented + # line. Insert these lines as the first block of the master blocks + # list for future processing. + blocks.insert(0, theRest) + + +class BlockQuoteProcessor(BlockProcessor): + + RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') + + def test(self, parent, block): + return bool(self.RE.search(block)) and not util.nearing_recursion_limit() + + def run(self, parent, blocks): + block = blocks.pop(0) + m = self.RE.search(block) + if m: + before = block[:m.start()] # Lines before blockquote + # Pass lines before blockquote in recursively for parsing first. + self.parser.parseBlocks(parent, [before]) + # Remove ``> `` from beginning of each line. + block = '\n'.join( + [self.clean(line) for line in block[m.start():].split('\n')] + ) + sibling = self.lastChild(parent) + if sibling is not None and sibling.tag == "blockquote": + # Previous block was a blockquote so set that as this blocks parent + quote = sibling + else: + # This is a new blockquote. Create a new parent element. + quote = etree.SubElement(parent, 'blockquote') + # Recursively parse block with blockquote as parent. + # change parser state so blockquotes embedded in lists use p tags + self.parser.state.set('blockquote') + self.parser.parseChunk(quote, block) + self.parser.state.reset() + + def clean(self, line): + """ Remove ``>`` from beginning of a line. """ + m = self.RE.match(line) + if line.strip() == ">": + return "" + elif m: + return m.group(2) + else: + return line + + +class OListProcessor(BlockProcessor): + """ Process ordered list blocks. """ + + TAG = 'ol' + # The integer (python string) with which the lists starts (default=1) + # Eg: If list is initialized as) + # 3. Item + # The ol tag will get starts="3" attribute + STARTSWITH = '1' + # Lazy ol - ignore startswith + LAZY_OL = True + # List of allowed sibling tags. + SIBLING_TAGS = ['ol', 'ul'] + + def __init__(self, parser): + super().__init__(parser) + # Detect an item (``1. item``). ``group(1)`` contains contents of item. + self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1)) + # Detect items on secondary lines. they can be of either list type. + self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' % + (self.tab_length - 1)) + # Detect indented (nested) items of either type + self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' % + (self.tab_length, self.tab_length * 2 - 1)) + + def test(self, parent, block): + return bool(self.RE.match(block)) + + def run(self, parent, blocks): + # Check fr multiple items in one block. + items = self.get_items(blocks.pop(0)) + sibling = self.lastChild(parent) + + if sibling is not None and sibling.tag in self.SIBLING_TAGS: + # Previous block was a list item, so set that as parent + lst = sibling + # make sure previous item is in a p- if the item has text, + # then it isn't in a p + if lst[-1].text: + # since it's possible there are other children for this + # sibling, we can't just SubElement the p, we need to + # insert it as the first item. + p = etree.Element('p') + p.text = lst[-1].text + lst[-1].text = '' + lst[-1].insert(0, p) + # if the last item has a tail, then the tail needs to be put in a p + # likely only when a header is not followed by a blank line + lch = self.lastChild(lst[-1]) + if lch is not None and lch.tail: + p = etree.SubElement(lst[-1], 'p') + p.text = lch.tail.lstrip() + lch.tail = '' + + # parse first block differently as it gets wrapped in a p. + li = etree.SubElement(lst, 'li') + self.parser.state.set('looselist') + firstitem = items.pop(0) + self.parser.parseBlocks(li, [firstitem]) + self.parser.state.reset() + elif parent.tag in ['ol', 'ul']: + # this catches the edge case of a multi-item indented list whose + # first item is in a blank parent-list item: + # * * subitem1 + # * subitem2 + # see also ListIndentProcessor + lst = parent + else: + # This is a new list so create parent with appropriate tag. + lst = etree.SubElement(parent, self.TAG) + # Check if a custom start integer is set + if not self.LAZY_OL and self.STARTSWITH != '1': + lst.attrib['start'] = self.STARTSWITH + + self.parser.state.set('list') + # Loop through items in block, recursively parsing each with the + # appropriate parent. + for item in items: + if item.startswith(' '*self.tab_length): + # Item is indented. Parse with last item as parent + self.parser.parseBlocks(lst[-1], [item]) + else: + # New item. Create li and parse with it as parent + li = etree.SubElement(lst, 'li') + self.parser.parseBlocks(li, [item]) + self.parser.state.reset() + + def get_items(self, block): + """ Break a block into list items. """ + items = [] + for line in block.split('\n'): + m = self.CHILD_RE.match(line) + if m: + # This is a new list item + # Check first item for the start index + if not items and self.TAG == 'ol': + # Detect the integer value of first list item + INTEGER_RE = re.compile(r'(\d+)') + self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() + # Append to the list + items.append(m.group(3)) + elif self.INDENT_RE.match(line): + # This is an indented (possibly nested) item. + if items[-1].startswith(' '*self.tab_length): + # Previous item was indented. Append to that item. + items[-1] = '{}\n{}'.format(items[-1], line) + else: + items.append(line) + else: + # This is another line of previous item. Append to that item. + items[-1] = '{}\n{}'.format(items[-1], line) + return items + + +class UListProcessor(OListProcessor): + """ Process unordered list blocks. """ + + TAG = 'ul' + + def __init__(self, parser): + super().__init__(parser) + # Detect an item (``1. item``). ``group(1)`` contains contents of item. + self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1)) + + +class HashHeaderProcessor(BlockProcessor): + """ Process Hash Headers. """ + + # Detect a header at start of any line in block + RE = re.compile(r'(?:^|\n)(?P#{1,6})(?P
(?:\\.|[^\\])*?)#*(?:\n|$)') + + def test(self, parent, block): + return bool(self.RE.search(block)) + + def run(self, parent, blocks): + block = blocks.pop(0) + m = self.RE.search(block) + if m: + before = block[:m.start()] # All lines before header + after = block[m.end():] # All lines after header + if before: + # As the header was not the first line of the block and the + # lines before the header must be parsed first, + # recursively parse this lines as a block. + self.parser.parseBlocks(parent, [before]) + # Create header using named groups from RE + h = etree.SubElement(parent, 'h%d' % len(m.group('level'))) + h.text = m.group('header').strip() + if after: + # Insert remaining lines as first block for future parsing. + blocks.insert(0, after) + else: # pragma: no cover + # This should never happen, but just in case... + logger.warn("We've got a problem header: %r" % block) + + +class SetextHeaderProcessor(BlockProcessor): + """ Process Setext-style Headers. """ + + # Detect Setext-style header. Must be first 2 lines of block. + RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) + + def test(self, parent, block): + return bool(self.RE.match(block)) + + def run(self, parent, blocks): + lines = blocks.pop(0).split('\n') + # Determine level. ``=`` is 1 and ``-`` is 2. + if lines[1].startswith('='): + level = 1 + else: + level = 2 + h = etree.SubElement(parent, 'h%d' % level) + h.text = lines[0].strip() + if len(lines) > 2: + # Block contains additional lines. Add to master blocks for later. + blocks.insert(0, '\n'.join(lines[2:])) + + +class HRProcessor(BlockProcessor): + """ Process Horizontal Rules. """ + + # Python's re module doesn't officially support atomic grouping. However you can fake it. + # See https://stackoverflow.com/a/13577411/866026 + RE = r'^[ ]{0,3}(?=(?P(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$' + # Detect hr on any line of a block. + SEARCH_RE = re.compile(RE, re.MULTILINE) + + def test(self, parent, block): + m = self.SEARCH_RE.search(block) + if m: + # Save match object on class instance so we can use it later. + self.match = m + return True + return False + + def run(self, parent, blocks): + block = blocks.pop(0) + match = self.match + # Check for lines in block before hr. + prelines = block[:match.start()].rstrip('\n') + if prelines: + # Recursively parse lines before hr so they get parsed first. + self.parser.parseBlocks(parent, [prelines]) + # create hr + etree.SubElement(parent, 'hr') + # check for lines in block after hr. + postlines = block[match.end():].lstrip('\n') + if postlines: + # Add lines after hr to master blocks for later parsing. + blocks.insert(0, postlines) + + +class EmptyBlockProcessor(BlockProcessor): + """ Process blocks that are empty or start with an empty line. """ + + def test(self, parent, block): + return not block or block.startswith('\n') + + def run(self, parent, blocks): + block = blocks.pop(0) + filler = '\n\n' + if block: + # Starts with empty line + # Only replace a single line. + filler = '\n' + # Save the rest for later. + theRest = block[1:] + if theRest: + # Add remaining lines to master blocks for later. + blocks.insert(0, theRest) + sibling = self.lastChild(parent) + if (sibling is not None and sibling.tag == 'pre' and + len(sibling) and sibling[0].tag == 'code'): + # Last block is a codeblock. Append to preserve whitespace. + sibling[0].text = util.AtomicString( + '{}{}'.format(sibling[0].text, filler) + ) + + +class ReferenceProcessor(BlockProcessor): + """ Process link references. """ + RE = re.compile( + r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE + ) + + def test(self, parent, block): + return True + + def run(self, parent, blocks): + block = blocks.pop(0) + m = self.RE.search(block) + if m: + id = m.group(1).strip().lower() + link = m.group(2).lstrip('<').rstrip('>') + title = m.group(5) or m.group(6) + self.parser.md.references[id] = (link, title) + if block[m.end():].strip(): + # Add any content after match back to blocks as separate block + blocks.insert(0, block[m.end():].lstrip('\n')) + if block[:m.start()].strip(): + # Add any content before match back to blocks as separate block + blocks.insert(0, block[:m.start()].rstrip('\n')) + return True + # No match. Restore block. + blocks.insert(0, block) + return False + + +class ParagraphProcessor(BlockProcessor): + """ Process Paragraph blocks. """ + + def test(self, parent, block): + return True + + def run(self, parent, blocks): + block = blocks.pop(0) + if block.strip(): + # Not a blank block. Add to parent, otherwise throw it away. + if self.parser.state.isstate('list'): + # The parent is a tight-list. + # + # Check for any children. This will likely only happen in a + # tight-list when a header isn't followed by a blank line. + # For example: + # + # * # Header + # Line 2 of list item - not part of header. + sibling = self.lastChild(parent) + if sibling is not None: + # Insetrt after sibling. + if sibling.tail: + sibling.tail = '{}\n{}'.format(sibling.tail, block) + else: + sibling.tail = '\n%s' % block + else: + # Append to parent.text + if parent.text: + parent.text = '{}\n{}'.format(parent.text, block) + else: + parent.text = block.lstrip() + else: + # Create a regular paragraph + p = etree.SubElement(parent, 'p') + p.text = block.lstrip() diff --git a/Libs/markdown/core.py b/Libs/markdown/core.py new file mode 100644 index 0000000..d8c8196 --- /dev/null +++ b/Libs/markdown/core.py @@ -0,0 +1,407 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import codecs +import sys +import logging +import importlib +from . import util +from .preprocessors import build_preprocessors +from .blockprocessors import build_block_parser +from .treeprocessors import build_treeprocessors +from .inlinepatterns import build_inlinepatterns +from .postprocessors import build_postprocessors +from .extensions import Extension +from .serializers import to_html_string, to_xhtml_string + +__all__ = ['Markdown', 'markdown', 'markdownFromFile'] + + +logger = logging.getLogger('MARKDOWN') + + +class Markdown: + """Convert Markdown to HTML.""" + + doc_tag = "div" # Element used to wrap document - later removed + + output_formats = { + 'html': to_html_string, + 'xhtml': to_xhtml_string, + } + + def __init__(self, **kwargs): + """ + Creates a new Markdown instance. + + Keyword arguments: + + * extensions: A list of extensions. + If an item is an instance of a subclass of `markdown.extension.Extension`, the instance will be used + as-is. If an item is of type string, first an entry point will be loaded. If that fails, the string is + assumed to use Python dot notation (`path.to.module:ClassName`) to load a markdown.Extension subclass. If + no class is specified, then a `makeExtension` function is called within the specified module. + * extension_configs: Configuration settings for extensions. + * output_format: Format of output. Supported formats are: + * "xhtml": Outputs XHTML style tags. Default. + * "html": Outputs HTML style tags. + * tab_length: Length of tabs in the source. Default: 4 + + """ + + self.tab_length = kwargs.get('tab_length', 4) + + self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']', + '(', ')', '>', '#', '+', '-', '.', '!'] + + self.block_level_elements = [ + # Elements which are invalid to wrap in a `

` tag. + # See https://w3c.github.io/html/grouping-content.html#the-p-element + 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', + 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', + 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', + 'p', 'pre', 'section', 'table', 'ul', + # Other elements which Markdown should not be mucking up the contents of. + 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend', + 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', + 'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video' + ] + + self.registeredExtensions = [] + self.docType = "" + self.stripTopLevelTags = True + + self.build_parser() + + self.references = {} + self.htmlStash = util.HtmlStash() + self.registerExtensions(extensions=kwargs.get('extensions', []), + configs=kwargs.get('extension_configs', {})) + self.set_output_format(kwargs.get('output_format', 'xhtml')) + self.reset() + + def build_parser(self): + """ Build the parser from the various parts. """ + self.preprocessors = build_preprocessors(self) + self.parser = build_block_parser(self) + self.inlinePatterns = build_inlinepatterns(self) + self.treeprocessors = build_treeprocessors(self) + self.postprocessors = build_postprocessors(self) + return self + + def registerExtensions(self, extensions, configs): + """ + Register extensions with this instance of Markdown. + + Keyword arguments: + + * extensions: A list of extensions, which can either + be strings or objects. + * configs: A dictionary mapping extension names to config options. + + """ + for ext in extensions: + if isinstance(ext, str): + ext = self.build_extension(ext, configs.get(ext, {})) + if isinstance(ext, Extension): + ext._extendMarkdown(self) + logger.debug( + 'Successfully loaded extension "%s.%s".' + % (ext.__class__.__module__, ext.__class__.__name__) + ) + elif ext is not None: + raise TypeError( + 'Extension "{}.{}" must be of type: "{}.{}"'.format( + ext.__class__.__module__, ext.__class__.__name__, + Extension.__module__, Extension.__name__ + ) + ) + return self + + def build_extension(self, ext_name, configs): + """ + Build extension from a string name, then return an instance. + + First attempt to load an entry point. The string name must be registered as an entry point in the + `markdown.extensions` group which points to a subclass of the `markdown.extensions.Extension` class. + If multiple distributions have registered the same name, the first one found is returned. + + If no entry point is found, assume dot notation (`path.to.module:ClassName`). Load the specified class and + return an instance. If no class is specified, import the module and call a `makeExtension` function and return + the Extension instance returned by that function. + """ + configs = dict(configs) + + entry_points = [ep for ep in util.INSTALLED_EXTENSIONS if ep.name == ext_name] + if entry_points: + ext = entry_points[0].load() + return ext(**configs) + + # Get class name (if provided): `path.to.module:ClassName` + ext_name, class_name = ext_name.split(':', 1) if ':' in ext_name else (ext_name, '') + + try: + module = importlib.import_module(ext_name) + logger.debug( + 'Successfully imported extension module "%s".' % ext_name + ) + except ImportError as e: + message = 'Failed loading extension "%s".' % ext_name + e.args = (message,) + e.args[1:] + raise + + if class_name: + # Load given class name from module. + return getattr(module, class_name)(**configs) + else: + # Expect makeExtension() function to return a class. + try: + return module.makeExtension(**configs) + except AttributeError as e: + message = e.args[0] + message = "Failed to initiate extension " \ + "'%s': %s" % (ext_name, message) + e.args = (message,) + e.args[1:] + raise + + def registerExtension(self, extension): + """ This gets called by the extension """ + self.registeredExtensions.append(extension) + return self + + def reset(self): + """ + Resets all state variables so that we can start with a new text. + """ + self.htmlStash.reset() + self.references.clear() + + for extension in self.registeredExtensions: + if hasattr(extension, 'reset'): + extension.reset() + + return self + + def set_output_format(self, format): + """ Set the output format for the class instance. """ + self.output_format = format.lower().rstrip('145') # ignore num + try: + self.serializer = self.output_formats[self.output_format] + except KeyError as e: + valid_formats = list(self.output_formats.keys()) + valid_formats.sort() + message = 'Invalid Output Format: "%s". Use one of %s.' \ + % (self.output_format, + '"' + '", "'.join(valid_formats) + '"') + e.args = (message,) + e.args[1:] + raise + return self + + def is_block_level(self, tag): + """Check if the tag is a block level HTML tag.""" + if isinstance(tag, str): + return tag.lower().rstrip('/') in self.block_level_elements + # Some ElementTree tags are not strings, so return False. + return False + + def convert(self, source): + """ + Convert markdown to serialized XHTML or HTML. + + Keyword arguments: + + * source: Source text as a Unicode string. + + Markdown processing takes place in five steps: + + 1. A bunch of "preprocessors" munge the input text. + 2. BlockParser() parses the high-level structural elements of the + pre-processed text into an ElementTree. + 3. A bunch of "treeprocessors" are run against the ElementTree. One + such treeprocessor runs InlinePatterns against the ElementTree, + detecting inline markup. + 4. Some post-processors are run against the text after the ElementTree + has been serialized into text. + 5. The output is written to a string. + + """ + + # Fixup the source text + if not source.strip(): + return '' # a blank unicode string + + try: + source = str(source) + except UnicodeDecodeError as e: # pragma: no cover + # Customise error message while maintaining original trackback + e.reason += '. -- Note: Markdown only accepts unicode input!' + raise + + # Split into lines and run the line preprocessors. + self.lines = source.split("\n") + for prep in self.preprocessors: + self.lines = prep.run(self.lines) + + # Parse the high-level elements. + root = self.parser.parseDocument(self.lines).getroot() + + # Run the tree-processors + for treeprocessor in self.treeprocessors: + newRoot = treeprocessor.run(root) + if newRoot is not None: + root = newRoot + + # Serialize _properly_. Strip top-level tags. + output = self.serializer(root) + if self.stripTopLevelTags: + try: + start = output.index( + '<%s>' % self.doc_tag) + len(self.doc_tag) + 2 + end = output.rindex('' % self.doc_tag) + output = output[start:end].strip() + except ValueError as e: # pragma: no cover + if output.strip().endswith('<%s />' % self.doc_tag): + # We have an empty document + output = '' + else: + # We have a serious problem + raise ValueError('Markdown failed to strip top-level ' + 'tags. Document=%r' % output.strip()) from e + + # Run the text post-processors + for pp in self.postprocessors: + output = pp.run(output) + + return output.strip() + + def convertFile(self, input=None, output=None, encoding=None): + """Converts a markdown file and returns the HTML as a unicode string. + + Decodes the file using the provided encoding (defaults to utf-8), + passes the file content to markdown, and outputs the html to either + the provided stream or the file with provided name, using the same + encoding as the source file. The 'xmlcharrefreplace' error handler is + used when encoding the output. + + **Note:** This is the only place that decoding and encoding of unicode + takes place in Python-Markdown. (All other code is unicode-in / + unicode-out.) + + Keyword arguments: + + * input: File object or path. Reads from stdin if `None`. + * output: File object or path. Writes to stdout if `None`. + * encoding: Encoding of input and output files. Defaults to utf-8. + + """ + + encoding = encoding or "utf-8" + + # Read the source + if input: + if isinstance(input, str): + input_file = codecs.open(input, mode="r", encoding=encoding) + else: + input_file = codecs.getreader(encoding)(input) + text = input_file.read() + input_file.close() + else: + text = sys.stdin.read() + if not isinstance(text, str): # pragma: no cover + text = text.decode(encoding) + + text = text.lstrip('\ufeff') # remove the byte-order mark + + # Convert + html = self.convert(text) + + # Write to file or stdout + if output: + if isinstance(output, str): + output_file = codecs.open(output, "w", + encoding=encoding, + errors="xmlcharrefreplace") + output_file.write(html) + output_file.close() + else: + writer = codecs.getwriter(encoding) + output_file = writer(output, errors="xmlcharrefreplace") + output_file.write(html) + # Don't close here. User may want to write more. + else: + # Encode manually and write bytes to stdout. + html = html.encode(encoding, "xmlcharrefreplace") + try: + # Write bytes directly to buffer (Python 3). + sys.stdout.buffer.write(html) + except AttributeError: # pragma: no cover + # Probably Python 2, which works with bytes by default. + sys.stdout.write(html) + + return self + + +""" +EXPORTED FUNCTIONS +============================================================================= + +Those are the two functions we really mean to export: markdown() and +markdownFromFile(). +""" + + +def markdown(text, **kwargs): + """Convert a markdown string to HTML and return HTML as a unicode string. + + This is a shortcut function for `Markdown` class to cover the most + basic use case. It initializes an instance of Markdown, loads the + necessary extensions and runs the parser on the given text. + + Keyword arguments: + + * text: Markdown formatted text as Unicode or ASCII string. + * Any arguments accepted by the Markdown class. + + Returns: An HTML document as a string. + + """ + md = Markdown(**kwargs) + return md.convert(text) + + +def markdownFromFile(**kwargs): + """Read markdown code from a file and write it to a file or a stream. + + This is a shortcut function which initializes an instance of Markdown, + and calls the convertFile method rather than convert. + + Keyword arguments: + + * input: a file name or readable object. + * output: a file name or writable object. + * encoding: Encoding of input and output. + * Any arguments accepted by the Markdown class. + + """ + md = Markdown(**kwargs) + md.convertFile(kwargs.get('input', None), + kwargs.get('output', None), + kwargs.get('encoding', None)) diff --git a/Libs/markdown/extensions/__init__.py b/Libs/markdown/extensions/__init__.py new file mode 100644 index 0000000..4712e25 --- /dev/null +++ b/Libs/markdown/extensions/__init__.py @@ -0,0 +1,107 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import warnings +from ..util import parseBoolValue + + +class Extension: + """ Base class for extensions to subclass. """ + + # Default config -- to be overridden by a subclass + # Must be of the following format: + # { + # 'key': ['value', 'description'] + # } + # Note that Extension.setConfig will raise a KeyError + # if a default is not set here. + config = {} + + def __init__(self, **kwargs): + """ Initiate Extension and set up configs. """ + self.setConfigs(kwargs) + + def getConfig(self, key, default=''): + """ Return a setting for the given key or an empty string. """ + if key in self.config: + return self.config[key][0] + else: + return default + + def getConfigs(self): + """ Return all configs settings as a dict. """ + return {key: self.getConfig(key) for key in self.config.keys()} + + def getConfigInfo(self): + """ Return all config descriptions as a list of tuples. """ + return [(key, self.config[key][1]) for key in self.config.keys()] + + def setConfig(self, key, value): + """ Set a config setting for `key` with the given `value`. """ + if isinstance(self.config[key][0], bool): + value = parseBoolValue(value) + if self.config[key][0] is None: + value = parseBoolValue(value, preserve_none=True) + self.config[key][0] = value + + def setConfigs(self, items): + """ Set multiple config settings given a dict or list of tuples. """ + if hasattr(items, 'items'): + # it's a dict + items = items.items() + for key, value in items: + self.setConfig(key, value) + + def _extendMarkdown(self, *args): + """ Private wrapper around extendMarkdown. """ + md = args[0] + try: + self.extendMarkdown(md) + except TypeError as e: + if "missing 1 required positional argument" in str(e): + # Must be a 2.x extension. Pass in a dumby md_globals. + self.extendMarkdown(md, {}) + warnings.warn( + "The 'md_globals' parameter of '{}.{}.extendMarkdown' is " + "deprecated.".format(self.__class__.__module__, self.__class__.__name__), + category=DeprecationWarning, + stacklevel=2 + ) + else: + raise + + def extendMarkdown(self, md): + """ + Add the various processors and patterns to the Markdown Instance. + + This method must be overridden by every extension. + + Keyword arguments: + + * md: The Markdown instance. + + * md_globals: Global variables in the markdown module namespace. + + """ + raise NotImplementedError( + 'Extension "%s.%s" must define an "extendMarkdown"' + 'method.' % (self.__class__.__module__, self.__class__.__name__) + ) diff --git a/Libs/markdown/extensions/__pycache__/__init__.cpython-310.pyc b/Libs/markdown/extensions/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..00d7ee9 Binary files /dev/null and b/Libs/markdown/extensions/__pycache__/__init__.cpython-310.pyc differ diff --git a/Libs/markdown/extensions/abbr.py b/Libs/markdown/extensions/abbr.py new file mode 100644 index 0000000..9879314 --- /dev/null +++ b/Libs/markdown/extensions/abbr.py @@ -0,0 +1,99 @@ +''' +Abbreviation Extension for Python-Markdown +========================================== + +This extension adds abbreviation handling to Python-Markdown. + +See +for documentation. + +Oringinal code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/) and + [Seemant Kulleen](http://www.kulleen.org/) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +''' + +from . import Extension +from ..blockprocessors import BlockProcessor +from ..inlinepatterns import InlineProcessor +from ..util import AtomicString +import re +import xml.etree.ElementTree as etree + + +class AbbrExtension(Extension): + """ Abbreviation Extension for Python-Markdown. """ + + def extendMarkdown(self, md): + """ Insert AbbrPreprocessor before ReferencePreprocessor. """ + md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16) + + +class AbbrPreprocessor(BlockProcessor): + """ Abbreviation Preprocessor - parse text for abbr references. """ + + RE = re.compile(r'^[*]\[(?P[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE) + + def test(self, parent, block): + return True + + def run(self, parent, blocks): + ''' + Find and remove all Abbreviation references from the text. + Each reference is set as a new AbbrPattern in the markdown instance. + + ''' + block = blocks.pop(0) + m = self.RE.search(block) + if m: + abbr = m.group('abbr').strip() + title = m.group('title').strip() + self.parser.md.inlinePatterns.register( + AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2 + ) + if block[m.end():].strip(): + # Add any content after match back to blocks as separate block + blocks.insert(0, block[m.end():].lstrip('\n')) + if block[:m.start()].strip(): + # Add any content before match back to blocks as separate block + blocks.insert(0, block[:m.start()].rstrip('\n')) + return True + # No match. Restore block. + blocks.insert(0, block) + return False + + def _generate_pattern(self, text): + ''' + Given a string, returns an regex pattern to match that string. + + 'HTML' -> r'(?P<abbr>[H][T][M][L])' + + Note: we force each char as a literal match (in brackets) as we don't + know what they will be beforehand. + + ''' + chars = list(text) + for i in range(len(chars)): + chars[i] = r'[%s]' % chars[i] + return r'(?P<abbr>\b%s\b)' % (r''.join(chars)) + + +class AbbrInlineProcessor(InlineProcessor): + """ Abbreviation inline pattern. """ + + def __init__(self, pattern, title): + super().__init__(pattern) + self.title = title + + def handleMatch(self, m, data): + abbr = etree.Element('abbr') + abbr.text = AtomicString(m.group('abbr')) + abbr.set('title', self.title) + return abbr, m.start(0), m.end(0) + + +def makeExtension(**kwargs): # pragma: no cover + return AbbrExtension(**kwargs) diff --git a/Libs/markdown/extensions/admonition.py b/Libs/markdown/extensions/admonition.py new file mode 100644 index 0000000..cb8d901 --- /dev/null +++ b/Libs/markdown/extensions/admonition.py @@ -0,0 +1,170 @@ +""" +Admonition extension for Python-Markdown +======================================== + +Adds rST-style admonitions. Inspired by [rST][] feature with the same name. + +[rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions # noqa + +See <https://Python-Markdown.github.io/extensions/admonition> +for documentation. + +Original code Copyright [Tiago Serafim](https://www.tiagoserafim.com/). + +All changes Copyright The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..blockprocessors import BlockProcessor +import xml.etree.ElementTree as etree +import re + + +class AdmonitionExtension(Extension): + """ Admonition extension for Python-Markdown. """ + + def extendMarkdown(self, md): + """ Add Admonition to Markdown instance. """ + md.registerExtension(self) + + md.parser.blockprocessors.register(AdmonitionProcessor(md.parser), 'admonition', 105) + + +class AdmonitionProcessor(BlockProcessor): + + CLASSNAME = 'admonition' + CLASSNAME_TITLE = 'admonition-title' + RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)') + RE_SPACES = re.compile(' +') + + def __init__(self, parser): + """Initialization.""" + + super().__init__(parser) + + self.current_sibling = None + self.content_indention = 0 + + def parse_content(self, parent, block): + """Get sibling admonition. + + Retrieve the appropriate sibling element. This can get tricky when + dealing with lists. + + """ + + old_block = block + the_rest = '' + + # We already acquired the block via test + if self.current_sibling is not None: + sibling = self.current_sibling + block, the_rest = self.detab(block, self.content_indent) + self.current_sibling = None + self.content_indent = 0 + return sibling, block, the_rest + + sibling = self.lastChild(parent) + + if sibling is None or sibling.get('class', '').find(self.CLASSNAME) == -1: + sibling = None + else: + # If the last child is a list and the content is sufficiently indented + # to be under it, then the content's sibling is in the list. + last_child = self.lastChild(sibling) + indent = 0 + while last_child: + if ( + sibling and block.startswith(' ' * self.tab_length * 2) and + last_child and last_child.tag in ('ul', 'ol', 'dl') + ): + + # The expectation is that we'll find an <li> or <dt>. + # We should get its last child as well. + sibling = self.lastChild(last_child) + last_child = self.lastChild(sibling) if sibling else None + + # Context has been lost at this point, so we must adjust the + # text's indentation level so it will be evaluated correctly + # under the list. + block = block[self.tab_length:] + indent += self.tab_length + else: + last_child = None + + if not block.startswith(' ' * self.tab_length): + sibling = None + + if sibling is not None: + indent += self.tab_length + block, the_rest = self.detab(old_block, indent) + self.current_sibling = sibling + self.content_indent = indent + + return sibling, block, the_rest + + def test(self, parent, block): + + if self.RE.search(block): + return True + else: + return self.parse_content(parent, block)[0] is not None + + def run(self, parent, blocks): + block = blocks.pop(0) + m = self.RE.search(block) + + if m: + if m.start() > 0: + self.parser.parseBlocks(parent, [block[:m.start()]]) + block = block[m.end():] # removes the first line + block, theRest = self.detab(block) + else: + sibling, block, theRest = self.parse_content(parent, block) + + if m: + klass, title = self.get_class_and_title(m) + div = etree.SubElement(parent, 'div') + div.set('class', '{} {}'.format(self.CLASSNAME, klass)) + if title: + p = etree.SubElement(div, 'p') + p.text = title + p.set('class', self.CLASSNAME_TITLE) + else: + # Sibling is a list item, but we need to wrap it's content should be wrapped in <p> + if sibling.tag in ('li', 'dd') and sibling.text: + text = sibling.text + sibling.text = '' + p = etree.SubElement(sibling, 'p') + p.text = text + + div = sibling + + self.parser.parseChunk(div, block) + + if theRest: + # This block contained unindented line(s) after the first indented + # line. Insert these lines as the first block of the master blocks + # list for future processing. + blocks.insert(0, theRest) + + def get_class_and_title(self, match): + klass, title = match.group(1).lower(), match.group(2) + klass = self.RE_SPACES.sub(' ', klass) + if title is None: + # no title was provided, use the capitalized classname as title + # e.g.: `!!! note` will render + # `<p class="admonition-title">Note</p>` + title = klass.split(' ', 1)[0].capitalize() + elif title == '': + # an explicit blank title should not be rendered + # e.g.: `!!! warning ""` will *not* render `p` with a title + title = None + return klass, title + + +def makeExtension(**kwargs): # pragma: no cover + return AdmonitionExtension(**kwargs) diff --git a/Libs/markdown/extensions/attr_list.py b/Libs/markdown/extensions/attr_list.py new file mode 100644 index 0000000..9a67551 --- /dev/null +++ b/Libs/markdown/extensions/attr_list.py @@ -0,0 +1,166 @@ +""" +Attribute List Extension for Python-Markdown +============================================ + +Adds attribute list syntax. Inspired by +[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s +feature of the same name. + +See <https://Python-Markdown.github.io/extensions/attr_list> +for documentation. + +Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..treeprocessors import Treeprocessor +import re + + +def _handle_double_quote(s, t): + k, v = t.split('=', 1) + return k, v.strip('"') + + +def _handle_single_quote(s, t): + k, v = t.split('=', 1) + return k, v.strip("'") + + +def _handle_key_value(s, t): + return t.split('=', 1) + + +def _handle_word(s, t): + if t.startswith('.'): + return '.', t[1:] + if t.startswith('#'): + return 'id', t[1:] + return t, t + + +_scanner = re.Scanner([ + (r'[^ =]+=".*?"', _handle_double_quote), + (r"[^ =]+='.*?'", _handle_single_quote), + (r'[^ =]+=[^ =]+', _handle_key_value), + (r'[^ =]+', _handle_word), + (r' ', None) +]) + + +def get_attrs(str): + """ Parse attribute list and return a list of attribute tuples. """ + return _scanner.scan(str)[0] + + +def isheader(elem): + return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] + + +class AttrListTreeprocessor(Treeprocessor): + + BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}' + HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) + BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) + INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) + NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' + r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' + r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' + r'\uf900-\ufdcf\ufdf0-\ufffd' + r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') + + def run(self, doc): + for elem in doc.iter(): + if self.md.is_block_level(elem.tag): + # Block level: check for attrs on last line of text + RE = self.BLOCK_RE + if isheader(elem) or elem.tag in ['dt', 'td', 'th']: + # header, def-term, or table cell: check for attrs at end of element + RE = self.HEADER_RE + if len(elem) and elem.tag == 'li': + # special case list items. children may include a ul or ol. + pos = None + # find the ul or ol position + for i, child in enumerate(elem): + if child.tag in ['ul', 'ol']: + pos = i + break + if pos is None and elem[-1].tail: + # use tail of last child. no ul or ol. + m = RE.search(elem[-1].tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem[-1].tail = elem[-1].tail[:m.start()] + elif pos is not None and pos > 0 and elem[pos-1].tail: + # use tail of last child before ul or ol + m = RE.search(elem[pos-1].tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem[pos-1].tail = elem[pos-1].tail[:m.start()] + elif elem.text: + # use text. ul is first child. + m = RE.search(elem.text) + if m: + self.assign_attrs(elem, m.group(1)) + elem.text = elem.text[:m.start()] + elif len(elem) and elem[-1].tail: + # has children. Get from tail of last child + m = RE.search(elem[-1].tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem[-1].tail = elem[-1].tail[:m.start()] + if isheader(elem): + # clean up trailing #s + elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() + elif elem.text: + # no children. Get from text. + m = RE.search(elem.text) + if m: + self.assign_attrs(elem, m.group(1)) + elem.text = elem.text[:m.start()] + if isheader(elem): + # clean up trailing #s + elem.text = elem.text.rstrip('#').rstrip() + else: + # inline: check for attrs at start of tail + if elem.tail: + m = self.INLINE_RE.match(elem.tail) + if m: + self.assign_attrs(elem, m.group(1)) + elem.tail = elem.tail[m.end():] + + def assign_attrs(self, elem, attrs): + """ Assign attrs to element. """ + for k, v in get_attrs(attrs): + if k == '.': + # add to class + cls = elem.get('class') + if cls: + elem.set('class', '{} {}'.format(cls, v)) + else: + elem.set('class', v) + else: + # assign attr k with v + elem.set(self.sanitize_name(k), v) + + def sanitize_name(self, name): + """ + Sanitize name as 'an XML Name, minus the ":"'. + See https://www.w3.org/TR/REC-xml-names/#NT-NCName + """ + return self.NAME_RE.sub('_', name) + + +class AttrListExtension(Extension): + def extendMarkdown(self, md): + md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) + md.registerExtension(self) + + +def makeExtension(**kwargs): # pragma: no cover + return AttrListExtension(**kwargs) diff --git a/Libs/markdown/extensions/codehilite.py b/Libs/markdown/extensions/codehilite.py new file mode 100644 index 0000000..a768b73 --- /dev/null +++ b/Libs/markdown/extensions/codehilite.py @@ -0,0 +1,308 @@ +""" +CodeHilite Extension for Python-Markdown +======================================== + +Adds code/syntax highlighting to standard Python-Markdown code blocks. + +See <https://Python-Markdown.github.io/extensions/code_hilite> +for documentation. + +Original code Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/). + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import parseBoolValue + +try: # pragma: no cover + from pygments import highlight + from pygments.lexers import get_lexer_by_name, guess_lexer + from pygments.formatters import get_formatter_by_name + pygments = True +except ImportError: # pragma: no cover + pygments = False + + +def parse_hl_lines(expr): + """Support our syntax for emphasizing certain lines of code. + + expr should be like '1 2' to emphasize lines 1 and 2 of a code block. + Returns a list of ints, the line numbers to emphasize. + """ + if not expr: + return [] + + try: + return list(map(int, expr.split())) + except ValueError: # pragma: no cover + return [] + + +# ------------------ The Main CodeHilite Class ---------------------- +class CodeHilite: + """ + Determine language of source code, and pass it on to the Pygments highlighter. + + Usage: + code = CodeHilite(src=some_code, lang='python') + html = code.hilite() + + Arguments: + * src: Source string or any object with a .readline attribute. + + * lang: String name of Pygments lexer to use for highlighting. Default: `None`. + + * guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid + value. Default: `True`. + + * use_pygments: Pass code to pygments for code highlighting. If `False`, the code is + instead wrapped for highlighting by a JavaScript library. Default: `True`. + + * linenums: An alias to Pygments `linenos` formatter option. Default: `None`. + + * css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'. + + * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`. + Default: "language-". + + Other Options: + Any other options are accepted and passed on to the lexer and formatter. Therefore, + valid options include any options which are accepted by the `html` formatter or + whichever lexer the code's language uses. Note that most lexers do not have any + options. However, a few have very useful options, such as PHP's `startinline` option. + Any invalid options are ignored without error. + + Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter + Lexer Options: https://pygments.org/docs/lexers/ + + Advanced Usage: + code = CodeHilite( + src = some_code, + lang = 'php', + startinline = True, # Lexer option. Snippet does not start with `<?php`. + linenostart = 42, # Formatter option. Snippet starts on line 42. + hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50. + linenos = 'inline' # Formatter option. Avoid alignment problems. + ) + html = code.hilite() + + """ + + def __init__(self, src, **options): + self.src = src + self.lang = options.pop('lang', None) + self.guess_lang = options.pop('guess_lang', True) + self.use_pygments = options.pop('use_pygments', True) + self.lang_prefix = options.pop('lang_prefix', 'language-') + + if 'linenos' not in options: + options['linenos'] = options.pop('linenums', None) + if 'cssclass' not in options: + options['cssclass'] = options.pop('css_class', 'codehilite') + if 'wrapcode' not in options: + # Override pygments default + options['wrapcode'] = True + # Disallow use of `full` option + options['full'] = False + + self.options = options + + def hilite(self, shebang=True): + """ + Pass code to the [Pygments](http://pygments.pocoo.org/) highliter with + optional line numbers. The output should then be styled with css to + your liking. No styles are applied by default - only styling hooks + (i.e.: <span class="k">). + + returns : A string of html. + + """ + + self.src = self.src.strip('\n') + + if self.lang is None and shebang: + self._parseHeader() + + if pygments and self.use_pygments: + try: + lexer = get_lexer_by_name(self.lang, **self.options) + except ValueError: + try: + if self.guess_lang: + lexer = guess_lexer(self.src, **self.options) + else: + lexer = get_lexer_by_name('text', **self.options) + except ValueError: # pragma: no cover + lexer = get_lexer_by_name('text', **self.options) + formatter = get_formatter_by_name('html', **self.options) + return highlight(self.src, lexer, formatter) + else: + # just escape and build markup usable by JS highlighting libs + txt = self.src.replace('&', '&') + txt = txt.replace('<', '<') + txt = txt.replace('>', '>') + txt = txt.replace('"', '"') + classes = [] + if self.lang: + classes.append('{}{}'.format(self.lang_prefix, self.lang)) + if self.options['linenos']: + classes.append('linenums') + class_str = '' + if classes: + class_str = ' class="{}"'.format(' '.join(classes)) + return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format( + self.options['cssclass'], + class_str, + txt + ) + + def _parseHeader(self): + """ + Determines language of a code block from shebang line and whether the + said line should be removed or left in place. If the sheband line + contains a path (even a single /) then it is assumed to be a real + shebang line and left alone. However, if no path is given + (e.i.: #!python or :::python) then it is assumed to be a mock shebang + for language identification of a code fragment and removed from the + code block prior to processing for code highlighting. When a mock + shebang (e.i: #!python) is found, line numbering is turned on. When + colons are found in place of a shebang (e.i.: :::python), line + numbering is left in the current state - off by default. + + Also parses optional list of highlight lines, like: + + :::python hl_lines="1 3" + """ + + import re + + # split text into lines + lines = self.src.split("\n") + # pull first line to examine + fl = lines.pop(0) + + c = re.compile(r''' + (?:(?:^::+)|(?P<shebang>^[#]!)) # Shebang or 2 or more colons + (?P<path>(?:/\w+)*[/ ])? # Zero or 1 path + (?P<lang>[\w#.+-]*) # The language + \s* # Arbitrary whitespace + # Optional highlight lines, single- or double-quote-delimited + (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))? + ''', re.VERBOSE) + # search first line for shebang + m = c.search(fl) + if m: + # we have a match + try: + self.lang = m.group('lang').lower() + except IndexError: # pragma: no cover + self.lang = None + if m.group('path'): + # path exists - restore first line + lines.insert(0, fl) + if self.options['linenos'] is None and m.group('shebang'): + # Overridable and Shebang exists - use line numbers + self.options['linenos'] = True + + self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines')) + else: + # No match + lines.insert(0, fl) + + self.src = "\n".join(lines).strip("\n") + + +# ------------------ The Markdown Extension ------------------------------- + + +class HiliteTreeprocessor(Treeprocessor): + """ Highlight source code in code blocks. """ + + def code_unescape(self, text): + """Unescape code.""" + text = text.replace("<", "<") + text = text.replace(">", ">") + # Escaped '&' should be replaced at the end to avoid + # conflicting with < and >. + text = text.replace("&", "&") + return text + + def run(self, root): + """ Find code blocks and store in htmlStash. """ + blocks = root.iter('pre') + for block in blocks: + if len(block) == 1 and block[0].tag == 'code': + local_config = self.config.copy() + code = CodeHilite( + self.code_unescape(block[0].text), + tab_length=self.md.tab_length, + style=local_config.pop('pygments_style', 'default'), + **local_config + ) + placeholder = self.md.htmlStash.store(code.hilite()) + # Clear codeblock in etree instance + block.clear() + # Change to p element which will later + # be removed when inserting raw html + block.tag = 'p' + block.text = placeholder + + +class CodeHiliteExtension(Extension): + """ Add source code highlighting to markdown codeblocks. """ + + def __init__(self, **kwargs): + # define default configs + self.config = { + 'linenums': [None, + "Use lines numbers. True|table|inline=yes, False=no, None=auto"], + 'guess_lang': [True, + "Automatic language detection - Default: True"], + 'css_class': ["codehilite", + "Set class name for wrapper <div> - " + "Default: codehilite"], + 'pygments_style': ['default', + 'Pygments HTML Formatter Style ' + '(Colorscheme) - Default: default'], + 'noclasses': [False, + 'Use inline styles instead of CSS classes - ' + 'Default false'], + 'use_pygments': [True, + 'Use Pygments to Highlight code blocks. ' + 'Disable if using a JavaScript library. ' + 'Default: True'], + 'lang_prefix': [ + 'language-', + 'Prefix prepended to the language when use_pygments is false. Default: "language-"' + ] + } + + for key, value in kwargs.items(): + if key in self.config: + self.setConfig(key, value) + else: + # manually set unknown keywords. + if isinstance(value, str): + try: + # Attempt to parse str as a bool value + value = parseBoolValue(value, preserve_none=True) + except ValueError: + pass # Assume it's not a bool value. Use as-is. + self.config[key] = [value, ''] + + def extendMarkdown(self, md): + """ Add HilitePostprocessor to Markdown instance. """ + hiliter = HiliteTreeprocessor(md) + hiliter.config = self.getConfigs() + md.treeprocessors.register(hiliter, 'hilite', 30) + + md.registerExtension(self) + + +def makeExtension(**kwargs): # pragma: no cover + return CodeHiliteExtension(**kwargs) diff --git a/Libs/markdown/extensions/def_list.py b/Libs/markdown/extensions/def_list.py new file mode 100644 index 0000000..17549f0 --- /dev/null +++ b/Libs/markdown/extensions/def_list.py @@ -0,0 +1,111 @@ +""" +Definition List Extension for Python-Markdown +============================================= + +Adds parsing of Definition Lists to Python-Markdown. + +See <https://Python-Markdown.github.io/extensions/definition_lists> +for documentation. + +Original code Copyright 2008 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..blockprocessors import BlockProcessor, ListIndentProcessor +import xml.etree.ElementTree as etree +import re + + +class DefListProcessor(BlockProcessor): + """ Process Definition Lists. """ + + RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)') + NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]') + + def test(self, parent, block): + return bool(self.RE.search(block)) + + def run(self, parent, blocks): + + raw_block = blocks.pop(0) + m = self.RE.search(raw_block) + terms = [term.strip() for term in + raw_block[:m.start()].split('\n') if term.strip()] + block = raw_block[m.end():] + no_indent = self.NO_INDENT_RE.match(block) + if no_indent: + d, theRest = (block, None) + else: + d, theRest = self.detab(block) + if d: + d = '{}\n{}'.format(m.group(2), d) + else: + d = m.group(2) + sibling = self.lastChild(parent) + if not terms and sibling is None: + # This is not a definition item. Most likely a paragraph that + # starts with a colon at the beginning of a document or list. + blocks.insert(0, raw_block) + return False + if not terms and sibling.tag == 'p': + # The previous paragraph contains the terms + state = 'looselist' + terms = sibling.text.split('\n') + parent.remove(sibling) + # Acquire new sibling + sibling = self.lastChild(parent) + else: + state = 'list' + + if sibling is not None and sibling.tag == 'dl': + # This is another item on an existing list + dl = sibling + if not terms and len(dl) and dl[-1].tag == 'dd' and len(dl[-1]): + state = 'looselist' + else: + # This is a new list + dl = etree.SubElement(parent, 'dl') + # Add terms + for term in terms: + dt = etree.SubElement(dl, 'dt') + dt.text = term + # Add definition + self.parser.state.set(state) + dd = etree.SubElement(dl, 'dd') + self.parser.parseBlocks(dd, [d]) + self.parser.state.reset() + + if theRest: + blocks.insert(0, theRest) + + +class DefListIndentProcessor(ListIndentProcessor): + """ Process indented children of definition list items. """ + + # Definition lists need to be aware of all list types + ITEM_TYPES = ['dd', 'li'] + LIST_TYPES = ['dl', 'ol', 'ul'] + + def create_item(self, parent, block): + """ Create a new dd or li (depending on parent) and parse the block with it as the parent. """ + + dd = etree.SubElement(parent, 'dd') + self.parser.parseBlocks(dd, [block]) + + +class DefListExtension(Extension): + """ Add definition lists to Markdown. """ + + def extendMarkdown(self, md): + """ Add an instance of DefListProcessor to BlockParser. """ + md.parser.blockprocessors.register(DefListIndentProcessor(md.parser), 'defindent', 85) + md.parser.blockprocessors.register(DefListProcessor(md.parser), 'deflist', 25) + + +def makeExtension(**kwargs): # pragma: no cover + return DefListExtension(**kwargs) diff --git a/Libs/markdown/extensions/extra.py b/Libs/markdown/extensions/extra.py new file mode 100644 index 0000000..909ba07 --- /dev/null +++ b/Libs/markdown/extensions/extra.py @@ -0,0 +1,58 @@ +""" +Python-Markdown Extra Extension +=============================== + +A compilation of various Python-Markdown extensions that imitates +[PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/). + +Note that each of the individual extensions still need to be available +on your PYTHONPATH. This extension simply wraps them all up as a +convenience so that only one extension needs to be listed when +initiating Markdown. See the documentation for each individual +extension for specifics about that extension. + +There may be additional extensions that are distributed with +Python-Markdown that are not included here in Extra. Those extensions +are not part of PHP Markdown Extra, and therefore, not part of +Python-Markdown Extra. If you really would like Extra to include +additional extensions, we suggest creating your own clone of Extra +under a different name. You could also edit the `extensions` global +variable defined below, but be aware that such changes may be lost +when you upgrade to any future version of Python-Markdown. + +See <https://Python-Markdown.github.io/extensions/extra> +for documentation. + +Copyright The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension + +extensions = [ + 'fenced_code', + 'footnotes', + 'attr_list', + 'def_list', + 'tables', + 'abbr', + 'md_in_html' +] + + +class ExtraExtension(Extension): + """ Add various extensions to Markdown class.""" + + def __init__(self, **kwargs): + """ config is a dumb holder which gets passed to actual ext later. """ + self.config = kwargs + + def extendMarkdown(self, md): + """ Register extension instances. """ + md.registerExtensions(extensions, self.config) + + +def makeExtension(**kwargs): # pragma: no cover + return ExtraExtension(**kwargs) diff --git a/Libs/markdown/extensions/fenced_code.py b/Libs/markdown/extensions/fenced_code.py new file mode 100644 index 0000000..409166a --- /dev/null +++ b/Libs/markdown/extensions/fenced_code.py @@ -0,0 +1,174 @@ +""" +Fenced Code Extension for Python Markdown +========================================= + +This extension adds Fenced Code Blocks to Python-Markdown. + +See <https://Python-Markdown.github.io/extensions/fenced_code_blocks> +for documentation. + +Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/). + + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) +""" + + +from textwrap import dedent +from . import Extension +from ..preprocessors import Preprocessor +from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines +from .attr_list import get_attrs, AttrListExtension +from ..util import parseBoolValue +from ..serializers import _escape_attrib_html +import re + + +class FencedCodeExtension(Extension): + def __init__(self, **kwargs): + self.config = { + 'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"'] + } + super().__init__(**kwargs) + + def extendMarkdown(self, md): + """ Add FencedBlockPreprocessor to the Markdown instance. """ + md.registerExtension(self) + + md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25) + + +class FencedBlockPreprocessor(Preprocessor): + FENCED_BLOCK_RE = re.compile( + dedent(r''' + (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence + ((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or + (\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang + (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines) + \n # newline (end of opening fence) + (?P<code>.*?)(?<=\n) # the code block + (?P=fence)[ ]*$ # closing fence + '''), + re.MULTILINE | re.DOTALL | re.VERBOSE + ) + + def __init__(self, md, config): + super().__init__(md) + self.config = config + self.checked_for_deps = False + self.codehilite_conf = {} + self.use_attr_list = False + # List of options to convert to bool values + self.bool_options = [ + 'linenums', + 'guess_lang', + 'noclasses', + 'use_pygments' + ] + + def run(self, lines): + """ Match and store Fenced Code Blocks in the HtmlStash. """ + + # Check for dependent extensions + if not self.checked_for_deps: + for ext in self.md.registeredExtensions: + if isinstance(ext, CodeHiliteExtension): + self.codehilite_conf = ext.getConfigs() + if isinstance(ext, AttrListExtension): + self.use_attr_list = True + + self.checked_for_deps = True + + text = "\n".join(lines) + while 1: + m = self.FENCED_BLOCK_RE.search(text) + if m: + lang, id, classes, config = None, '', [], {} + if m.group('attrs'): + id, classes, config = self.handle_attrs(get_attrs(m.group('attrs'))) + if len(classes): + lang = classes.pop(0) + else: + if m.group('lang'): + lang = m.group('lang') + if m.group('hl_lines'): + # Support hl_lines outside of attrs for backward-compatibility + config['hl_lines'] = parse_hl_lines(m.group('hl_lines')) + + # If config is not empty, then the codehighlite extension + # is enabled, so we call it to highlight the code + if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True): + local_config = self.codehilite_conf.copy() + local_config.update(config) + # Combine classes with cssclass. Ensure cssclass is at end + # as pygments appends a suffix under certain circumstances. + # Ignore ID as Pygments does not offer an option to set it. + if classes: + local_config['css_class'] = '{} {}'.format( + ' '.join(classes), + local_config['css_class'] + ) + highliter = CodeHilite( + m.group('code'), + lang=lang, + style=local_config.pop('pygments_style', 'default'), + **local_config + ) + + code = highliter.hilite(shebang=False) + else: + id_attr = lang_attr = class_attr = kv_pairs = '' + if lang: + prefix = self.config.get('lang_prefix', 'language-') + lang_attr = f' class="{prefix}{_escape_attrib_html(lang)}"' + if classes: + class_attr = f' class="{_escape_attrib_html(" ".join(classes))}"' + if id: + id_attr = f' id="{_escape_attrib_html(id)}"' + if self.use_attr_list and config and not config.get('use_pygments', False): + # Only assign key/value pairs to code element if attr_list ext is enabled, key/value pairs + # were defined on the code block, and the `use_pygments` key was not set to True. The + # `use_pygments` key could be either set to False or not defined. It is omitted from output. + kv_pairs = ''.join( + f' {k}="{_escape_attrib_html(v)}"' for k, v in config.items() if k != 'use_pygments' + ) + code = self._escape(m.group('code')) + code = f'<pre{id_attr}{class_attr}><code{lang_attr}{kv_pairs}>{code}</code></pre>' + + placeholder = self.md.htmlStash.store(code) + text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}' + else: + break + return text.split("\n") + + def handle_attrs(self, attrs): + """ Return tuple: (id, [list, of, classes], {configs}) """ + id = '' + classes = [] + configs = {} + for k, v in attrs: + if k == 'id': + id = v + elif k == '.': + classes.append(v) + elif k == 'hl_lines': + configs[k] = parse_hl_lines(v) + elif k in self.bool_options: + configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True) + else: + configs[k] = v + return id, classes, configs + + def _escape(self, txt): + """ basic html escaping """ + txt = txt.replace('&', '&') + txt = txt.replace('<', '<') + txt = txt.replace('>', '>') + txt = txt.replace('"', '"') + return txt + + +def makeExtension(**kwargs): # pragma: no cover + return FencedCodeExtension(**kwargs) diff --git a/Libs/markdown/extensions/footnotes.py b/Libs/markdown/extensions/footnotes.py new file mode 100644 index 0000000..1cc7118 --- /dev/null +++ b/Libs/markdown/extensions/footnotes.py @@ -0,0 +1,402 @@ +""" +Footnotes Extension for Python-Markdown +======================================= + +Adds footnote handling to Python-Markdown. + +See <https://Python-Markdown.github.io/extensions/footnotes> +for documentation. + +Copyright The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..blockprocessors import BlockProcessor +from ..inlinepatterns import InlineProcessor +from ..treeprocessors import Treeprocessor +from ..postprocessors import Postprocessor +from .. import util +from collections import OrderedDict +import re +import copy +import xml.etree.ElementTree as etree + +FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX +NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX +RE_REF_ID = re.compile(r'(fnref)(\d+)') + + +class FootnoteExtension(Extension): + """ Footnote Extension. """ + + def __init__(self, **kwargs): + """ Setup configs. """ + + self.config = { + 'PLACE_MARKER': + ["///Footnotes Go Here///", + "The text string that marks where the footnotes go"], + 'UNIQUE_IDS': + [False, + "Avoid name collisions across " + "multiple calls to reset()."], + "BACKLINK_TEXT": + ["↩", + "The text string that links from the footnote " + "to the reader's place."], + "BACKLINK_TITLE": + ["Jump back to footnote %d in the text", + "The text string used for the title HTML attribute " + "of the backlink. %d will be replaced by the " + "footnote number."], + "SEPARATOR": + [":", + "Footnote separator."] + } + super().__init__(**kwargs) + + # In multiple invocations, emit links that don't get tangled. + self.unique_prefix = 0 + self.found_refs = {} + self.used_refs = set() + + self.reset() + + def extendMarkdown(self, md): + """ Add pieces to Markdown. """ + md.registerExtension(self) + self.parser = md.parser + self.md = md + # Insert a blockprocessor before ReferencePreprocessor + md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17) + + # Insert an inline pattern before ImageReferencePattern + FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah + md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175) + # Insert a tree-processor that would actually add the footnote div + # This must be before all other treeprocessors (i.e., inline and + # codehilite) so they can run on the the contents of the div. + md.treeprocessors.register(FootnoteTreeprocessor(self), 'footnote', 50) + + # Insert a tree-processor that will run after inline is done. + # In this tree-processor we want to check our duplicate footnote tracker + # And add additional backrefs to the footnote pointing back to the + # duplicated references. + md.treeprocessors.register(FootnotePostTreeprocessor(self), 'footnote-duplicate', 15) + + # Insert a postprocessor after amp_substitute processor + md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25) + + def reset(self): + """ Clear footnotes on reset, and prepare for distinct document. """ + self.footnotes = OrderedDict() + self.unique_prefix += 1 + self.found_refs = {} + self.used_refs = set() + + def unique_ref(self, reference, found=False): + """ Get a unique reference if there are duplicates. """ + if not found: + return reference + + original_ref = reference + while reference in self.used_refs: + ref, rest = reference.split(self.get_separator(), 1) + m = RE_REF_ID.match(ref) + if m: + reference = '%s%d%s%s' % (m.group(1), int(m.group(2))+1, self.get_separator(), rest) + else: + reference = '%s%d%s%s' % (ref, 2, self.get_separator(), rest) + + self.used_refs.add(reference) + if original_ref in self.found_refs: + self.found_refs[original_ref] += 1 + else: + self.found_refs[original_ref] = 1 + return reference + + def findFootnotesPlaceholder(self, root): + """ Return ElementTree Element that contains Footnote placeholder. """ + def finder(element): + for child in element: + if child.text: + if child.text.find(self.getConfig("PLACE_MARKER")) > -1: + return child, element, True + if child.tail: + if child.tail.find(self.getConfig("PLACE_MARKER")) > -1: + return child, element, False + child_res = finder(child) + if child_res is not None: + return child_res + return None + + res = finder(root) + return res + + def setFootnote(self, id, text): + """ Store a footnote for later retrieval. """ + self.footnotes[id] = text + + def get_separator(self): + """ Get the footnote separator. """ + return self.getConfig("SEPARATOR") + + def makeFootnoteId(self, id): + """ Return footnote link id. """ + if self.getConfig("UNIQUE_IDS"): + return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) + else: + return 'fn{}{}'.format(self.get_separator(), id) + + def makeFootnoteRefId(self, id, found=False): + """ Return footnote back-link id. """ + if self.getConfig("UNIQUE_IDS"): + return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) + else: + return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found) + + def makeFootnotesDiv(self, root): + """ Return div of footnotes as et Element. """ + + if not list(self.footnotes.keys()): + return None + + div = etree.Element("div") + div.set('class', 'footnote') + etree.SubElement(div, "hr") + ol = etree.SubElement(div, "ol") + surrogate_parent = etree.Element("div") + + for index, id in enumerate(self.footnotes.keys(), start=1): + li = etree.SubElement(ol, "li") + li.set("id", self.makeFootnoteId(id)) + # Parse footnote with surrogate parent as li cannot be used. + # List block handlers have special logic to deal with li. + # When we are done parsing, we will copy everything over to li. + self.parser.parseChunk(surrogate_parent, self.footnotes[id]) + for el in list(surrogate_parent): + li.append(el) + surrogate_parent.remove(el) + backlink = etree.Element("a") + backlink.set("href", "#" + self.makeFootnoteRefId(id)) + backlink.set("class", "footnote-backref") + backlink.set( + "title", + self.getConfig("BACKLINK_TITLE") % (index) + ) + backlink.text = FN_BACKLINK_TEXT + + if len(li): + node = li[-1] + if node.tag == "p": + node.text = node.text + NBSP_PLACEHOLDER + node.append(backlink) + else: + p = etree.SubElement(li, "p") + p.append(backlink) + return div + + +class FootnoteBlockProcessor(BlockProcessor): + """ Find all footnote references and store for later use. """ + + RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) + + def __init__(self, footnotes): + super().__init__(footnotes.parser) + self.footnotes = footnotes + + def test(self, parent, block): + return True + + def run(self, parent, blocks): + """ Find, set, and remove footnote definitions. """ + block = blocks.pop(0) + m = self.RE.search(block) + if m: + id = m.group(1) + fn_blocks = [m.group(2)] + + # Handle rest of block + therest = block[m.end():].lstrip('\n') + m2 = self.RE.search(therest) + if m2: + # Another footnote exists in the rest of this block. + # Any content before match is continuation of this footnote, which may be lazily indented. + before = therest[:m2.start()].rstrip('\n') + fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n') + # Add back to blocks everything from beginning of match forward for next iteration. + blocks.insert(0, therest[m2.start():]) + else: + # All remaining lines of block are continuation of this footnote, which may be lazily indented. + fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n') + + # Check for child elements in remaining blocks. + fn_blocks.extend(self.detectTabbed(blocks)) + + footnote = "\n\n".join(fn_blocks) + self.footnotes.setFootnote(id, footnote.rstrip()) + + if block[:m.start()].strip(): + # Add any content before match back to blocks as separate block + blocks.insert(0, block[:m.start()].rstrip('\n')) + return True + # No match. Restore block. + blocks.insert(0, block) + return False + + def detectTabbed(self, blocks): + """ Find indented text and remove indent before further proccesing. + + Returns: a list of blocks with indentation removed. + """ + fn_blocks = [] + while blocks: + if blocks[0].startswith(' '*4): + block = blocks.pop(0) + # Check for new footnotes within this block and split at new footnote. + m = self.RE.search(block) + if m: + # Another footnote exists in this block. + # Any content before match is continuation of this footnote, which may be lazily indented. + before = block[:m.start()].rstrip('\n') + fn_blocks.append(self.detab(before)) + # Add back to blocks everything from beginning of match forward for next iteration. + blocks.insert(0, block[m.start():]) + # End of this footnote. + break + else: + # Entire block is part of this footnote. + fn_blocks.append(self.detab(block)) + else: + # End of this footnote. + break + return fn_blocks + + def detab(self, block): + """ Remove one level of indent from a block. + + Preserve lazily indented blocks by only removing indent from indented lines. + """ + lines = block.split('\n') + for i, line in enumerate(lines): + if line.startswith(' '*4): + lines[i] = line[4:] + return '\n'.join(lines) + + +class FootnoteInlineProcessor(InlineProcessor): + """ InlinePattern for footnote markers in a document's body text. """ + + def __init__(self, pattern, footnotes): + super().__init__(pattern) + self.footnotes = footnotes + + def handleMatch(self, m, data): + id = m.group(1) + if id in self.footnotes.footnotes.keys(): + sup = etree.Element("sup") + a = etree.SubElement(sup, "a") + sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) + a.set('href', '#' + self.footnotes.makeFootnoteId(id)) + a.set('class', 'footnote-ref') + a.text = str(list(self.footnotes.footnotes.keys()).index(id) + 1) + return sup, m.start(0), m.end(0) + else: + return None, None, None + + +class FootnotePostTreeprocessor(Treeprocessor): + """ Amend footnote div with duplicates. """ + + def __init__(self, footnotes): + self.footnotes = footnotes + + def add_duplicates(self, li, duplicates): + """ Adjust current li and add the duplicates: fnref2, fnref3, etc. """ + for link in li.iter('a'): + # Find the link that needs to be duplicated. + if link.attrib.get('class', '') == 'footnote-backref': + ref, rest = link.attrib['href'].split(self.footnotes.get_separator(), 1) + # Duplicate link the number of times we need to + # and point the to the appropriate references. + links = [] + for index in range(2, duplicates + 1): + sib_link = copy.deepcopy(link) + sib_link.attrib['href'] = '%s%d%s%s' % (ref, index, self.footnotes.get_separator(), rest) + links.append(sib_link) + self.offset += 1 + # Add all the new duplicate links. + el = list(li)[-1] + for link in links: + el.append(link) + break + + def get_num_duplicates(self, li): + """ Get the number of duplicate refs of the footnote. """ + fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) + link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest) + return self.footnotes.found_refs.get(link_id, 0) + + def handle_duplicates(self, parent): + """ Find duplicate footnotes and format and add the duplicates. """ + for li in list(parent): + # Check number of duplicates footnotes and insert + # additional links if needed. + count = self.get_num_duplicates(li) + if count > 1: + self.add_duplicates(li, count) + + def run(self, root): + """ Crawl the footnote div and add missing duplicate footnotes. """ + self.offset = 0 + for div in root.iter('div'): + if div.attrib.get('class', '') == 'footnote': + # Footnotes should be under the first ordered list under + # the footnote div. So once we find it, quit. + for ol in div.iter('ol'): + self.handle_duplicates(ol) + break + + +class FootnoteTreeprocessor(Treeprocessor): + """ Build and append footnote div to end of document. """ + + def __init__(self, footnotes): + self.footnotes = footnotes + + def run(self, root): + footnotesDiv = self.footnotes.makeFootnotesDiv(root) + if footnotesDiv is not None: + result = self.footnotes.findFootnotesPlaceholder(root) + if result: + child, parent, isText = result + ind = list(parent).index(child) + if isText: + parent.remove(child) + parent.insert(ind, footnotesDiv) + else: + parent.insert(ind + 1, footnotesDiv) + child.tail = None + else: + root.append(footnotesDiv) + + +class FootnotePostprocessor(Postprocessor): + """ Replace placeholders with html entities. """ + def __init__(self, footnotes): + self.footnotes = footnotes + + def run(self, text): + text = text.replace( + FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") + ) + return text.replace(NBSP_PLACEHOLDER, " ") + + +def makeExtension(**kwargs): # pragma: no cover + """ Return an instance of the FootnoteExtension """ + return FootnoteExtension(**kwargs) diff --git a/Libs/markdown/extensions/legacy_attrs.py b/Libs/markdown/extensions/legacy_attrs.py new file mode 100644 index 0000000..445aba1 --- /dev/null +++ b/Libs/markdown/extensions/legacy_attrs.py @@ -0,0 +1,67 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + +Legacy Attributes Extension +=========================== + +An extension to Python Markdown which implements legacy attributes. + +Prior to Python-Markdown version 3.0, the Markdown class had an `enable_attributes` +keyword which was on by default and provided for attributes to be defined for elements +using the format `{@key=value}`. This extension is provided as a replacement for +backward compatibility. New documents should be authored using attr_lists. However, +numerious documents exist which have been using the old attribute format for many +years. This extension can be used to continue to render those documents correctly. +""" + +import re +from markdown.treeprocessors import Treeprocessor, isString +from markdown.extensions import Extension + + +ATTR_RE = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} + + +class LegacyAttrs(Treeprocessor): + def run(self, doc): + """Find and set values of attributes ({@key=value}). """ + for el in doc.iter(): + alt = el.get('alt', None) + if alt is not None: + el.set('alt', self.handleAttributes(el, alt)) + if el.text and isString(el.text): + el.text = self.handleAttributes(el, el.text) + if el.tail and isString(el.tail): + el.tail = self.handleAttributes(el, el.tail) + + def handleAttributes(self, el, txt): + """ Set attributes and return text without definitions. """ + def attributeCallback(match): + el.set(match.group(1), match.group(2).replace('\n', ' ')) + return ATTR_RE.sub(attributeCallback, txt) + + +class LegacyAttrExtension(Extension): + def extendMarkdown(self, md): + md.treeprocessors.register(LegacyAttrs(md), 'legacyattrs', 15) + + +def makeExtension(**kwargs): # pragma: no cover + return LegacyAttrExtension(**kwargs) diff --git a/Libs/markdown/extensions/legacy_em.py b/Libs/markdown/extensions/legacy_em.py new file mode 100644 index 0000000..360988b --- /dev/null +++ b/Libs/markdown/extensions/legacy_em.py @@ -0,0 +1,49 @@ +''' +Legacy Em Extension for Python-Markdown +======================================= + +This extension provides legacy behavior for _connected_words_. + +Copyright 2015-2018 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +''' + +from . import Extension +from ..inlinepatterns import UnderscoreProcessor, EmStrongItem, EM_STRONG2_RE, STRONG_EM2_RE +import re + +# _emphasis_ +EMPHASIS_RE = r'(_)([^_]+)\1' + +# __strong__ +STRONG_RE = r'(_{2})(.+?)\1' + +# __strong_em___ +STRONG_EM_RE = r'(_)\1(?!\1)([^_]+?)\1(?!\1)(.+?)\1{3}' + + +class LegacyUnderscoreProcessor(UnderscoreProcessor): + """Emphasis processor for handling strong and em matches inside underscores.""" + + PATTERNS = [ + EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), + EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), + EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), + EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), + EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') + ] + + +class LegacyEmExtension(Extension): + """ Add legacy_em extension to Markdown class.""" + + def extendMarkdown(self, md): + """ Modify inline patterns. """ + md.inlinePatterns.register(LegacyUnderscoreProcessor(r'_'), 'em_strong2', 50) + + +def makeExtension(**kwargs): # pragma: no cover + """ Return an instance of the LegacyEmExtension """ + return LegacyEmExtension(**kwargs) diff --git a/Libs/markdown/extensions/md_in_html.py b/Libs/markdown/extensions/md_in_html.py new file mode 100644 index 0000000..ec7dcba --- /dev/null +++ b/Libs/markdown/extensions/md_in_html.py @@ -0,0 +1,364 @@ +""" +Python-Markdown Markdown in HTML Extension +=============================== + +An implementation of [PHP Markdown Extra](http://michelf.com/projects/php-markdown/extra/)'s +parsing of Markdown syntax in raw HTML. + +See <https://Python-Markdown.github.io/extensions/raw_html> +for documentation. + +Copyright The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..blockprocessors import BlockProcessor +from ..preprocessors import Preprocessor +from ..postprocessors import RawHtmlPostprocessor +from .. import util +from ..htmlparser import HTMLExtractor, blank_line_re +import xml.etree.ElementTree as etree + + +class HTMLExtractorExtra(HTMLExtractor): + """ + Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown. + """ + + def __init__(self, md, *args, **kwargs): + # All block-level tags. + self.block_level_tags = set(md.block_level_elements.copy()) + # Block-level tags in which the content only gets span level parsing + self.span_tags = set( + ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'summary', 'td', 'th'] + ) + # Block-level tags which never get their content parsed. + self.raw_tags = set(['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea']) + + super().__init__(md, *args, **kwargs) + + # Block-level tags in which the content gets parsed as blocks + self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags) + self.span_and_blocks_tags = self.block_tags | self.span_tags + + def reset(self): + """Reset this instance. Loses all unprocessed data.""" + self.mdstack = [] # When markdown=1, stack contains a list of tags + self.treebuilder = etree.TreeBuilder() + self.mdstate = [] # one of 'block', 'span', 'off', or None + super().reset() + + def close(self): + """Handle any buffered data.""" + super().close() + # Handle any unclosed tags. + if self.mdstack: + # Close the outermost parent. handle_endtag will close all unclosed children. + self.handle_endtag(self.mdstack[0]) + + def get_element(self): + """ Return element from treebuilder and reset treebuilder for later use. """ + element = self.treebuilder.close() + self.treebuilder = etree.TreeBuilder() + return element + + def get_state(self, tag, attrs): + """ Return state from tag and `markdown` attr. One of 'block', 'span', or 'off'. """ + md_attr = attrs.get('markdown', '0') + if md_attr == 'markdown': + # `<tag markdown>` is the same as `<tag markdown='1'>`. + md_attr = '1' + parent_state = self.mdstate[-1] if self.mdstate else None + if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'): + # Only use the parent state if it is more restrictive than the markdown attribute. + md_attr = parent_state + if ((md_attr == '1' and tag in self.block_tags) or + (md_attr == 'block' and tag in self.span_and_blocks_tags)): + return 'block' + elif ((md_attr == '1' and tag in self.span_tags) or + (md_attr == 'span' and tag in self.span_and_blocks_tags)): + return 'span' + elif tag in self.block_level_tags: + return 'off' + else: # pragma: no cover + return None + + def handle_starttag(self, tag, attrs): + # Handle tags that should always be empty and do not specify a closing tag + if tag in self.empty_tags and (self.at_line_start() or self.intail): + attrs = {key: value if value is not None else key for key, value in attrs} + if "markdown" in attrs: + attrs.pop('markdown') + element = etree.Element(tag, attrs) + data = etree.tostring(element, encoding='unicode', method='html') + else: + data = self.get_starttag_text() + self.handle_empty_tag(data, True) + return + + if tag in self.block_level_tags and (self.at_line_start() or self.intail): + # Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`. + # Convert to `{'checked': 'checked'}`. + attrs = {key: value if value is not None else key for key, value in attrs} + state = self.get_state(tag, attrs) + if self.inraw or (state in [None, 'off'] and not self.mdstack): + # fall back to default behavior + attrs.pop('markdown', None) + super().handle_starttag(tag, attrs) + else: + if 'p' in self.mdstack and tag in self.block_level_tags: + # Close unclosed 'p' tag + self.handle_endtag('p') + self.mdstate.append(state) + self.mdstack.append(tag) + attrs['markdown'] = state + self.treebuilder.start(tag, attrs) + else: + # Span level tag + if self.inraw: + super().handle_starttag(tag, attrs) + else: + text = self.get_starttag_text() + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(text)) + else: + self.handle_data(text) + if tag in self.CDATA_CONTENT_ELEMENTS: + # This is presumably a standalone tag in a code span (see #1036). + self.clear_cdata_mode() + + def handle_endtag(self, tag): + if tag in self.block_level_tags: + if self.inraw: + super().handle_endtag(tag) + elif tag in self.mdstack: + # Close element and any unclosed children + while self.mdstack: + item = self.mdstack.pop() + self.mdstate.pop() + self.treebuilder.end(item) + if item == tag: + break + if not self.mdstack: + # Last item in stack is closed. Stash it + element = self.get_element() + # Get last entry to see if it ends in newlines + # If it is an element, assume there is no newlines + item = self.cleandoc[-1] if self.cleandoc else '' + # If we only have one newline before block element, add another + if not item.endswith('\n\n') and item.endswith('\n'): + self.cleandoc.append('\n') + self.cleandoc.append(self.md.htmlStash.store(element)) + self.cleandoc.append('\n\n') + self.state = [] + # Check if element has a tail + if not blank_line_re.match( + self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]): + # More content exists after endtag. + self.intail = True + else: + # Treat orphan closing tag as a span level tag. + text = self.get_endtag_text(tag) + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(text)) + else: + self.handle_data(text) + else: + # Span level tag + if self.inraw: + super().handle_endtag(tag) + else: + text = self.get_endtag_text(tag) + if self.mdstate and self.mdstate[-1] == "off": + self.handle_data(self.md.htmlStash.store(text)) + else: + self.handle_data(text) + + def handle_startendtag(self, tag, attrs): + if tag in self.empty_tags: + attrs = {key: value if value is not None else key for key, value in attrs} + if "markdown" in attrs: + attrs.pop('markdown') + element = etree.Element(tag, attrs) + data = etree.tostring(element, encoding='unicode', method='html') + else: + data = self.get_starttag_text() + else: + data = self.get_starttag_text() + self.handle_empty_tag(data, is_block=self.md.is_block_level(tag)) + + def handle_data(self, data): + if self.intail and '\n' in data: + self.intail = False + if self.inraw or not self.mdstack: + super().handle_data(data) + else: + self.treebuilder.data(data) + + def handle_empty_tag(self, data, is_block): + if self.inraw or not self.mdstack: + super().handle_empty_tag(data, is_block) + else: + if self.at_line_start() and is_block: + self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n') + else: + self.handle_data(self.md.htmlStash.store(data)) + + def parse_pi(self, i): + if self.at_line_start() or self.intail or self.mdstack: + # The same override exists in HTMLExtractor without the check + # for mdstack. Therefore, use HTMLExtractor's parent instead. + return super(HTMLExtractor, self).parse_pi(i) + # This is not the beginning of a raw block so treat as plain data + # and avoid consuming any tags which may follow (see #1066). + self.handle_data('<?') + return i + 2 + + def parse_html_declaration(self, i): + if self.at_line_start() or self.intail or self.mdstack: + # The same override exists in HTMLExtractor without the check + # for mdstack. Therefore, use HTMLExtractor's parent instead. + return super(HTMLExtractor, self).parse_html_declaration(i) + # This is not the beginning of a raw block so treat as plain data + # and avoid consuming any tags which may follow (see #1066). + self.handle_data('<!') + return i + 2 + + +class HtmlBlockPreprocessor(Preprocessor): + """Remove html blocks from the text and store them for later retrieval.""" + + def run(self, lines): + source = '\n'.join(lines) + parser = HTMLExtractorExtra(self.md) + parser.feed(source) + parser.close() + return ''.join(parser.cleandoc).split('\n') + + +class MarkdownInHtmlProcessor(BlockProcessor): + """Process Markdown Inside HTML Blocks which have been stored in the HtmlStash.""" + + def test(self, parent, block): + # ALways return True. `run` will return `False` it not a valid match. + return True + + def parse_element_content(self, element): + """ + Recursively parse the text content of an etree Element as Markdown. + + Any block level elements generated from the Markdown will be inserted as children of the element in place + of the text content. All `markdown` attributes are removed. For any elements in which Markdown parsing has + been disabled, the text content of it and its chidlren are wrapped in an `AtomicString`. + """ + + md_attr = element.attrib.pop('markdown', 'off') + + if md_attr == 'block': + # Parse content as block level + # The order in which the different parts are parsed (text, children, tails) is important here as the + # order of elements needs to be preserved. We can't be inserting items at a later point in the current + # iteration as we don't want to do raw processing on elements created from parsing Markdown text (for + # example). Therefore, the order of operations is children, tails, text. + + # Recursively parse existing children from raw HTML + for child in list(element): + self.parse_element_content(child) + + # Parse Markdown text in tail of children. Do this separate to avoid raw HTML parsing. + # Save the position of each item to be inserted later in reverse. + tails = [] + for pos, child in enumerate(element): + if child.tail: + block = child.tail.rstrip('\n') + child.tail = '' + # Use a dummy placeholder element. + dummy = etree.Element('div') + self.parser.parseBlocks(dummy, block.split('\n\n')) + children = list(dummy) + children.reverse() + tails.append((pos + 1, children)) + + # Insert the elements created from the tails in reverse. + tails.reverse() + for pos, tail in tails: + for item in tail: + element.insert(pos, item) + + # Parse Markdown text content. Do this last to avoid raw HTML parsing. + if element.text: + block = element.text.rstrip('\n') + element.text = '' + # Use a dummy placeholder element as the content needs to get inserted before existing children. + dummy = etree.Element('div') + self.parser.parseBlocks(dummy, block.split('\n\n')) + children = list(dummy) + children.reverse() + for child in children: + element.insert(0, child) + + elif md_attr == 'span': + # Span level parsing will be handled by inlineprocessors. + # Walk children here to remove any `markdown` attributes. + for child in list(element): + self.parse_element_content(child) + + else: + # Disable inline parsing for everything else + if element.text is None: + element.text = '' + element.text = util.AtomicString(element.text) + for child in list(element): + self.parse_element_content(child) + if child.tail: + child.tail = util.AtomicString(child.tail) + + def run(self, parent, blocks): + m = util.HTML_PLACEHOLDER_RE.match(blocks[0]) + if m: + index = int(m.group(1)) + element = self.parser.md.htmlStash.rawHtmlBlocks[index] + if isinstance(element, etree.Element): + # We have a matched element. Process it. + blocks.pop(0) + self.parse_element_content(element) + parent.append(element) + # Cleanup stash. Replace element with empty string to avoid confusing postprocessor. + self.parser.md.htmlStash.rawHtmlBlocks.pop(index) + self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '') + # Confirm the match to the blockparser. + return True + # No match found. + return False + + +class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor): + def stash_to_string(self, text): + """ Override default to handle any etree elements still in the stash. """ + if isinstance(text, etree.Element): + return self.md.serializer(text) + else: + return str(text) + + +class MarkdownInHtmlExtension(Extension): + """Add Markdown parsing in HTML to Markdown class.""" + + def extendMarkdown(self, md): + """ Register extension instances. """ + + # Replace raw HTML preprocessor + md.preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20) + # Add blockprocessor which handles the placeholders for etree elements + md.parser.blockprocessors.register( + MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105 + ) + # Replace raw HTML postprocessor + md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30) + + +def makeExtension(**kwargs): # pragma: no cover + return MarkdownInHtmlExtension(**kwargs) diff --git a/Libs/markdown/extensions/meta.py b/Libs/markdown/extensions/meta.py new file mode 100644 index 0000000..10dee11 --- /dev/null +++ b/Libs/markdown/extensions/meta.py @@ -0,0 +1,79 @@ +""" +Meta Data Extension for Python-Markdown +======================================= + +This extension adds Meta Data handling to markdown. + +See <https://Python-Markdown.github.io/extensions/meta_data> +for documentation. + +Original code Copyright 2007-2008 [Waylan Limberg](http://achinghead.com). + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..preprocessors import Preprocessor +import re +import logging + +log = logging.getLogger('MARKDOWN') + +# Global Vars +META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)') +META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)') +BEGIN_RE = re.compile(r'^-{3}(\s.*)?') +END_RE = re.compile(r'^(-{3}|\.{3})(\s.*)?') + + +class MetaExtension (Extension): + """ Meta-Data extension for Python-Markdown. """ + + def extendMarkdown(self, md): + """ Add MetaPreprocessor to Markdown instance. """ + md.registerExtension(self) + self.md = md + md.preprocessors.register(MetaPreprocessor(md), 'meta', 27) + + def reset(self): + self.md.Meta = {} + + +class MetaPreprocessor(Preprocessor): + """ Get Meta-Data. """ + + def run(self, lines): + """ Parse Meta-Data and store in Markdown.Meta. """ + meta = {} + key = None + if lines and BEGIN_RE.match(lines[0]): + lines.pop(0) + while lines: + line = lines.pop(0) + m1 = META_RE.match(line) + if line.strip() == '' or END_RE.match(line): + break # blank line or end of YAML header - done + if m1: + key = m1.group('key').lower().strip() + value = m1.group('value').strip() + try: + meta[key].append(value) + except KeyError: + meta[key] = [value] + else: + m2 = META_MORE_RE.match(line) + if m2 and key: + # Add another line to existing key + meta[key].append(m2.group('value').strip()) + else: + lines.insert(0, line) + break # no meta data - done + self.md.Meta = meta + return lines + + +def makeExtension(**kwargs): # pragma: no cover + return MetaExtension(**kwargs) diff --git a/Libs/markdown/extensions/nl2br.py b/Libs/markdown/extensions/nl2br.py new file mode 100644 index 0000000..6c7491b --- /dev/null +++ b/Libs/markdown/extensions/nl2br.py @@ -0,0 +1,33 @@ +""" +NL2BR Extension +=============== + +A Python-Markdown extension to treat newlines as hard breaks; like +GitHub-flavored Markdown does. + +See <https://Python-Markdown.github.io/extensions/nl2br> +for documentation. + +Oringinal code Copyright 2011 [Brian Neal](https://deathofagremmie.com/) + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..inlinepatterns import SubstituteTagInlineProcessor + +BR_RE = r'\n' + + +class Nl2BrExtension(Extension): + + def extendMarkdown(self, md): + br_tag = SubstituteTagInlineProcessor(BR_RE, 'br') + md.inlinePatterns.register(br_tag, 'nl', 5) + + +def makeExtension(**kwargs): # pragma: no cover + return Nl2BrExtension(**kwargs) diff --git a/Libs/markdown/extensions/sane_lists.py b/Libs/markdown/extensions/sane_lists.py new file mode 100644 index 0000000..e27eb18 --- /dev/null +++ b/Libs/markdown/extensions/sane_lists.py @@ -0,0 +1,54 @@ +""" +Sane List Extension for Python-Markdown +======================================= + +Modify the behavior of Lists in Python-Markdown to act in a sane manor. + +See <https://Python-Markdown.github.io/extensions/sane_lists> +for documentation. + +Original code Copyright 2011 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2011-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..blockprocessors import OListProcessor, UListProcessor +import re + + +class SaneOListProcessor(OListProcessor): + + SIBLING_TAGS = ['ol'] + LAZY_OL = False + + def __init__(self, parser): + super().__init__(parser) + self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.))[ ]+(.*)' % + (self.tab_length - 1)) + + +class SaneUListProcessor(UListProcessor): + + SIBLING_TAGS = ['ul'] + + def __init__(self, parser): + super().__init__(parser) + self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' % + (self.tab_length - 1)) + + +class SaneListExtension(Extension): + """ Add sane lists to Markdown. """ + + def extendMarkdown(self, md): + """ Override existing Processors. """ + md.parser.blockprocessors.register(SaneOListProcessor(md.parser), 'olist', 40) + md.parser.blockprocessors.register(SaneUListProcessor(md.parser), 'ulist', 30) + + +def makeExtension(**kwargs): # pragma: no cover + return SaneListExtension(**kwargs) diff --git a/Libs/markdown/extensions/smarty.py b/Libs/markdown/extensions/smarty.py new file mode 100644 index 0000000..894805f --- /dev/null +++ b/Libs/markdown/extensions/smarty.py @@ -0,0 +1,263 @@ +''' +Smarty extension for Python-Markdown +==================================== + +Adds conversion of ASCII dashes, quotes and ellipses to their HTML +entity equivalents. + +See <https://Python-Markdown.github.io/extensions/smarty> +for documentation. + +Author: 2013, Dmitry Shachnev <mitya57@gmail.com> + +All changes Copyright 2013-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +SmartyPants license: + + Copyright (c) 2003 John Gruber <https://daringfireball.net/> + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + + +smartypants.py license: + + smartypants.py is a derivative work of SmartyPants. + Copyright (c) 2004, 2007 Chad Miller <http://web.chad.org/> + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + This software is provided by the copyright holders and contributors "as + is" and any express or implied warranties, including, but not limited + to, the implied warranties of merchantability and fitness for a + particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + +''' + + +from . import Extension +from ..inlinepatterns import HtmlInlineProcessor, HTML_RE +from ..treeprocessors import InlineProcessor +from ..util import Registry, deprecated + + +# Constants for quote education. +punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" +endOfWordClass = r"[\s.,;:!?)]" +closeClass = r"[^\ \t\r\n\[\{\(\-\u0002\u0003]" + +openingQuotesBase = ( + r'(\s' # a whitespace char + r'| ' # or a non-breaking space entity + r'|--' # or dashes + r'|–|—' # or unicode + r'|&[mn]dash;' # or named dash entities + r'|–|—' # or decimal entities + r')' +) + +substitutions = { + 'mdash': '—', + 'ndash': '–', + 'ellipsis': '…', + 'left-angle-quote': '«', + 'right-angle-quote': '»', + 'left-single-quote': '‘', + 'right-single-quote': '’', + 'left-double-quote': '“', + 'right-double-quote': '”', +} + + +# Special case if the very first character is a quote +# followed by punctuation at a non-word-break. Close the quotes by brute force: +singleQuoteStartRe = r"^'(?=%s\B)" % punctClass +doubleQuoteStartRe = r'^"(?=%s\B)' % punctClass + +# Special case for double sets of quotes, e.g.: +# <p>He said, "'Quoted' words in a larger quote."</p> +doubleQuoteSetsRe = r""""'(?=\w)""" +singleQuoteSetsRe = r"""'"(?=\w)""" + +# Special case for decade abbreviations (the '80s): +decadeAbbrRe = r"(?<!\w)'(?=\d{2}s)" + +# Get most opening double quotes: +openingDoubleQuotesRegex = r'%s"(?=\w)' % openingQuotesBase + +# Double closing quotes: +closingDoubleQuotesRegex = r'"(?=\s)' +closingDoubleQuotesRegex2 = '(?<=%s)"' % closeClass + +# Get most opening single quotes: +openingSingleQuotesRegex = r"%s'(?=\w)" % openingQuotesBase + +# Single closing quotes: +closingSingleQuotesRegex = r"(?<=%s)'(?!\s|s\b|\d)" % closeClass +closingSingleQuotesRegex2 = r"(?<=%s)'(\s|s\b)" % closeClass + +# All remaining quotes should be opening ones +remainingSingleQuotesRegex = r"'" +remainingDoubleQuotesRegex = r'"' + +HTML_STRICT_RE = HTML_RE + r'(?!\>)' + + +class SubstituteTextPattern(HtmlInlineProcessor): + def __init__(self, pattern, replace, md): + """ Replaces matches with some text. """ + HtmlInlineProcessor.__init__(self, pattern) + self.replace = replace + self.md = md + + @property + @deprecated("Use 'md' instead.") + def markdown(self): + # TODO: remove this later + return self.md + + def handleMatch(self, m, data): + result = '' + for part in self.replace: + if isinstance(part, int): + result += m.group(part) + else: + result += self.md.htmlStash.store(part) + return result, m.start(0), m.end(0) + + +class SmartyExtension(Extension): + def __init__(self, **kwargs): + self.config = { + 'smart_quotes': [True, 'Educate quotes'], + 'smart_angled_quotes': [False, 'Educate angled quotes'], + 'smart_dashes': [True, 'Educate dashes'], + 'smart_ellipses': [True, 'Educate ellipses'], + 'substitutions': [{}, 'Overwrite default substitutions'], + } + super().__init__(**kwargs) + self.substitutions = dict(substitutions) + self.substitutions.update(self.getConfig('substitutions', default={})) + + def _addPatterns(self, md, patterns, serie, priority): + for ind, pattern in enumerate(patterns): + pattern += (md,) + pattern = SubstituteTextPattern(*pattern) + name = 'smarty-%s-%d' % (serie, ind) + self.inlinePatterns.register(pattern, name, priority-ind) + + def educateDashes(self, md): + emDashesPattern = SubstituteTextPattern( + r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md + ) + enDashesPattern = SubstituteTextPattern( + r'(?<!-)--(?!-)', (self.substitutions['ndash'],), md + ) + self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) + self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) + + def educateEllipses(self, md): + ellipsesPattern = SubstituteTextPattern( + r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md + ) + self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) + + def educateAngledQuotes(self, md): + leftAngledQuotePattern = SubstituteTextPattern( + r'\<\<', (self.substitutions['left-angle-quote'],), md + ) + rightAngledQuotePattern = SubstituteTextPattern( + r'\>\>', (self.substitutions['right-angle-quote'],), md + ) + self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) + self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) + + def educateQuotes(self, md): + lsquo = self.substitutions['left-single-quote'] + rsquo = self.substitutions['right-single-quote'] + ldquo = self.substitutions['left-double-quote'] + rdquo = self.substitutions['right-double-quote'] + patterns = ( + (singleQuoteStartRe, (rsquo,)), + (doubleQuoteStartRe, (rdquo,)), + (doubleQuoteSetsRe, (ldquo + lsquo,)), + (singleQuoteSetsRe, (lsquo + ldquo,)), + (decadeAbbrRe, (rsquo,)), + (openingSingleQuotesRegex, (1, lsquo)), + (closingSingleQuotesRegex, (rsquo,)), + (closingSingleQuotesRegex2, (rsquo, 1)), + (remainingSingleQuotesRegex, (lsquo,)), + (openingDoubleQuotesRegex, (1, ldquo)), + (closingDoubleQuotesRegex, (rdquo,)), + (closingDoubleQuotesRegex2, (rdquo,)), + (remainingDoubleQuotesRegex, (ldquo,)) + ) + self._addPatterns(md, patterns, 'quotes', 30) + + def extendMarkdown(self, md): + configs = self.getConfigs() + self.inlinePatterns = Registry() + if configs['smart_ellipses']: + self.educateEllipses(md) + if configs['smart_quotes']: + self.educateQuotes(md) + if configs['smart_angled_quotes']: + self.educateAngledQuotes(md) + # Override HTML_RE from inlinepatterns.py so that it does not + # process tags with duplicate closing quotes. + md.inlinePatterns.register(HtmlInlineProcessor(HTML_STRICT_RE, md), 'html', 90) + if configs['smart_dashes']: + self.educateDashes(md) + inlineProcessor = InlineProcessor(md) + inlineProcessor.inlinePatterns = self.inlinePatterns + md.treeprocessors.register(inlineProcessor, 'smarty', 2) + md.ESCAPED_CHARS.extend(['"', "'"]) + + +def makeExtension(**kwargs): # pragma: no cover + return SmartyExtension(**kwargs) diff --git a/Libs/markdown/extensions/tables.py b/Libs/markdown/extensions/tables.py new file mode 100644 index 0000000..0a9d084 --- /dev/null +++ b/Libs/markdown/extensions/tables.py @@ -0,0 +1,223 @@ +""" +Tables Extension for Python-Markdown +==================================== + +Added parsing of tables to Python-Markdown. + +See <https://Python-Markdown.github.io/extensions/tables> +for documentation. + +Original code Copyright 2009 [Waylan Limberg](http://achinghead.com) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..blockprocessors import BlockProcessor +import xml.etree.ElementTree as etree +import re +PIPE_NONE = 0 +PIPE_LEFT = 1 +PIPE_RIGHT = 2 + + +class TableProcessor(BlockProcessor): + """ Process Tables. """ + + RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') + RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') + + def __init__(self, parser): + self.border = False + self.separator = '' + super().__init__(parser) + + def test(self, parent, block): + """ + Ensure first two rows (column header and separator row) are valid table rows. + + Keep border check and separator row do avoid repeating the work. + """ + is_table = False + rows = [row.strip(' ') for row in block.split('\n')] + if len(rows) > 1: + header0 = rows[0] + self.border = PIPE_NONE + if header0.startswith('|'): + self.border |= PIPE_LEFT + if self.RE_END_BORDER.search(header0) is not None: + self.border |= PIPE_RIGHT + row = self._split_row(header0) + row0_len = len(row) + is_table = row0_len > 1 + + # Each row in a single column table needs at least one pipe. + if not is_table and row0_len == 1 and self.border: + for index in range(1, len(rows)): + is_table = rows[index].startswith('|') + if not is_table: + is_table = self.RE_END_BORDER.search(rows[index]) is not None + if not is_table: + break + + if is_table: + row = self._split_row(rows[1]) + is_table = (len(row) == row0_len) and set(''.join(row)) <= set('|:- ') + if is_table: + self.separator = row + + return is_table + + def run(self, parent, blocks): + """ Parse a table block and build table. """ + block = blocks.pop(0).split('\n') + header = block[0].strip(' ') + rows = [] if len(block) < 3 else block[2:] + + # Get alignment of columns + align = [] + for c in self.separator: + c = c.strip(' ') + if c.startswith(':') and c.endswith(':'): + align.append('center') + elif c.startswith(':'): + align.append('left') + elif c.endswith(':'): + align.append('right') + else: + align.append(None) + + # Build table + table = etree.SubElement(parent, 'table') + thead = etree.SubElement(table, 'thead') + self._build_row(header, thead, align) + tbody = etree.SubElement(table, 'tbody') + if len(rows) == 0: + # Handle empty table + self._build_empty_row(tbody, align) + else: + for row in rows: + self._build_row(row.strip(' '), tbody, align) + + def _build_empty_row(self, parent, align): + """Build an empty row.""" + tr = etree.SubElement(parent, 'tr') + count = len(align) + while count: + etree.SubElement(tr, 'td') + count -= 1 + + def _build_row(self, row, parent, align): + """ Given a row of text, build table cells. """ + tr = etree.SubElement(parent, 'tr') + tag = 'td' + if parent.tag == 'thead': + tag = 'th' + cells = self._split_row(row) + # We use align here rather than cells to ensure every row + # contains the same number of columns. + for i, a in enumerate(align): + c = etree.SubElement(tr, tag) + try: + c.text = cells[i].strip(' ') + except IndexError: # pragma: no cover + c.text = "" + if a: + c.set('align', a) + + def _split_row(self, row): + """ split a row of text into list of cells. """ + if self.border: + if row.startswith('|'): + row = row[1:] + row = self.RE_END_BORDER.sub('', row) + return self._split(row) + + def _split(self, row): + """ split a row of text with some code into a list of cells. """ + elements = [] + pipes = [] + tics = [] + tic_points = [] + tic_region = [] + good_pipes = [] + + # Parse row + # Throw out \\, and \| + for m in self.RE_CODE_PIPES.finditer(row): + # Store ` data (len, start_pos, end_pos) + if m.group(2): + # \`+ + # Store length of each tic group: subtract \ + tics.append(len(m.group(2)) - 1) + # Store start of group, end of group, and escape length + tic_points.append((m.start(2), m.end(2) - 1, 1)) + elif m.group(3): + # `+ + # Store length of each tic group + tics.append(len(m.group(3))) + # Store start of group, end of group, and escape length + tic_points.append((m.start(3), m.end(3) - 1, 0)) + # Store pipe location + elif m.group(5): + pipes.append(m.start(5)) + + # Pair up tics according to size if possible + # Subtract the escape length *only* from the opening. + # Walk through tic list and see if tic has a close. + # Store the tic region (start of region, end of region). + pos = 0 + tic_len = len(tics) + while pos < tic_len: + try: + tic_size = tics[pos] - tic_points[pos][2] + if tic_size == 0: + raise ValueError + index = tics[pos + 1:].index(tic_size) + 1 + tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) + pos += index + 1 + except ValueError: + pos += 1 + + # Resolve pipes. Check if they are within a tic pair region. + # Walk through pipes comparing them to each region. + # - If pipe position is less that a region, it isn't in a region + # - If it is within a region, we don't want it, so throw it out + # - If we didn't throw it out, it must be a table pipe + for pipe in pipes: + throw_out = False + for region in tic_region: + if pipe < region[0]: + # Pipe is not in a region + break + elif region[0] <= pipe <= region[1]: + # Pipe is within a code region. Throw it out. + throw_out = True + break + if not throw_out: + good_pipes.append(pipe) + + # Split row according to table delimiters. + pos = 0 + for pipe in good_pipes: + elements.append(row[pos:pipe]) + pos = pipe + 1 + elements.append(row[pos:]) + return elements + + +class TableExtension(Extension): + """ Add tables to Markdown. """ + + def extendMarkdown(self, md): + """ Add an instance of TableProcessor to BlockParser. """ + if '|' not in md.ESCAPED_CHARS: + md.ESCAPED_CHARS.append('|') + md.parser.blockprocessors.register(TableProcessor(md.parser), 'table', 75) + + +def makeExtension(**kwargs): # pragma: no cover + return TableExtension(**kwargs) diff --git a/Libs/markdown/extensions/toc.py b/Libs/markdown/extensions/toc.py new file mode 100644 index 0000000..57d2e3b --- /dev/null +++ b/Libs/markdown/extensions/toc.py @@ -0,0 +1,380 @@ +""" +Table of Contents Extension for Python-Markdown +=============================================== + +See <https://Python-Markdown.github.io/extensions/toc> +for documentation. + +Oringinal code Copyright 2008 [Jack Miller](https://codezen.org/) + +All changes Copyright 2008-2014 The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +""" + +from . import Extension +from ..treeprocessors import Treeprocessor +from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString +from ..postprocessors import UnescapePostprocessor +import re +import html +import unicodedata +import xml.etree.ElementTree as etree + + +def slugify(value, separator, unicode=False): + """ Slugify a string, to make it URL friendly. """ + if not unicode: + # Replace Extended Latin characters with ASCII, i.e. žlutý → zluty + value = unicodedata.normalize('NFKD', value) + value = value.encode('ascii', 'ignore').decode('ascii') + value = re.sub(r'[^\w\s-]', '', value).strip().lower() + return re.sub(r'[{}\s]+'.format(separator), separator, value) + + +def slugify_unicode(value, separator): + """ Slugify a string, to make it URL friendly while preserving Unicode characters. """ + return slugify(value, separator, unicode=True) + + +IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') + + +def unique(id, ids): + """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ + while id in ids or not id: + m = IDCOUNT_RE.match(id) + if m: + id = '%s_%d' % (m.group(1), int(m.group(2))+1) + else: + id = '%s_%d' % (id, 1) + ids.add(id) + return id + + +def get_name(el): + """Get title name.""" + + text = [] + for c in el.itertext(): + if isinstance(c, AtomicString): + text.append(html.unescape(c)) + else: + text.append(c) + return ''.join(text).strip() + + +def stashedHTML2text(text, md, strip_entities=True): + """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ + def _html_sub(m): + """ Substitute raw html with plain text. """ + try: + raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] + except (IndexError, TypeError): # pragma: no cover + return m.group(0) + # Strip out tags and/or entities - leaving text + res = re.sub(r'(<[^>]+>)', '', raw) + if strip_entities: + res = re.sub(r'(&[\#a-zA-Z0-9]+;)', '', res) + return res + + return HTML_PLACEHOLDER_RE.sub(_html_sub, text) + + +def unescape(text): + """ Unescape escaped text. """ + c = UnescapePostprocessor() + return c.run(text) + + +def nest_toc_tokens(toc_list): + """Given an unsorted list with errors and skips, return a nested one. + [{'level': 1}, {'level': 2}] + => + [{'level': 1, 'children': [{'level': 2, 'children': []}]}] + + A wrong list is also converted: + [{'level': 2}, {'level': 1}] + => + [{'level': 2, 'children': []}, {'level': 1, 'children': []}] + """ + + ordered_list = [] + if len(toc_list): + # Initialize everything by processing the first entry + last = toc_list.pop(0) + last['children'] = [] + levels = [last['level']] + ordered_list.append(last) + parents = [] + + # Walk the rest nesting the entries properly + while toc_list: + t = toc_list.pop(0) + current_level = t['level'] + t['children'] = [] + + # Reduce depth if current level < last item's level + if current_level < levels[-1]: + # Pop last level since we know we are less than it + levels.pop() + + # Pop parents and levels we are less than or equal to + to_pop = 0 + for p in reversed(parents): + if current_level <= p['level']: + to_pop += 1 + else: # pragma: no cover + break + if to_pop: + levels = levels[:-to_pop] + parents = parents[:-to_pop] + + # Note current level as last + levels.append(current_level) + + # Level is the same, so append to + # the current parent (if available) + if current_level == levels[-1]: + (parents[-1]['children'] if parents + else ordered_list).append(t) + + # Current level is > last item's level, + # So make last item a parent and append current as child + else: + last['children'].append(t) + parents.append(last) + levels.append(current_level) + last = t + + return ordered_list + + +class TocTreeprocessor(Treeprocessor): + def __init__(self, md, config): + super().__init__(md) + + self.marker = config["marker"] + self.title = config["title"] + self.base_level = int(config["baselevel"]) - 1 + self.slugify = config["slugify"] + self.sep = config["separator"] + self.use_anchors = parseBoolValue(config["anchorlink"]) + self.anchorlink_class = config["anchorlink_class"] + self.use_permalinks = parseBoolValue(config["permalink"], False) + if self.use_permalinks is None: + self.use_permalinks = config["permalink"] + self.permalink_class = config["permalink_class"] + self.permalink_title = config["permalink_title"] + self.header_rgx = re.compile("[Hh][123456]") + if isinstance(config["toc_depth"], str) and '-' in config["toc_depth"]: + self.toc_top, self.toc_bottom = [int(x) for x in config["toc_depth"].split('-')] + else: + self.toc_top = 1 + self.toc_bottom = int(config["toc_depth"]) + + def iterparent(self, node): + ''' Iterator wrapper to get allowed parent and child all at once. ''' + + # We do not allow the marker inside a header as that + # would causes an enless loop of placing a new TOC + # inside previously generated TOC. + for child in node: + if not self.header_rgx.match(child.tag) and child.tag not in ['pre', 'code']: + yield node, child + yield from self.iterparent(child) + + def replace_marker(self, root, elem): + ''' Replace marker with elem. ''' + for (p, c) in self.iterparent(root): + text = ''.join(c.itertext()).strip() + if not text: + continue + + # To keep the output from screwing up the + # validation by putting a <div> inside of a <p> + # we actually replace the <p> in its entirety. + + # The <p> element may contain more than a single text content + # (nl2br can introduce a <br>). In this situation, c.text returns + # the very first content, ignore children contents or tail content. + # len(c) == 0 is here to ensure there is only text in the <p>. + if c.text and c.text.strip() == self.marker and len(c) == 0: + for i in range(len(p)): + if p[i] == c: + p[i] = elem + break + + def set_level(self, elem): + ''' Adjust header level according to base level. ''' + level = int(elem.tag[-1]) + self.base_level + if level > 6: + level = 6 + elem.tag = 'h%d' % level + + def add_anchor(self, c, elem_id): # @ReservedAssignment + anchor = etree.Element("a") + anchor.text = c.text + anchor.attrib["href"] = "#" + elem_id + anchor.attrib["class"] = self.anchorlink_class + c.text = "" + for elem in c: + anchor.append(elem) + while len(c): + c.remove(c[0]) + c.append(anchor) + + def add_permalink(self, c, elem_id): + permalink = etree.Element("a") + permalink.text = ("%spara;" % AMP_SUBSTITUTE + if self.use_permalinks is True + else self.use_permalinks) + permalink.attrib["href"] = "#" + elem_id + permalink.attrib["class"] = self.permalink_class + if self.permalink_title: + permalink.attrib["title"] = self.permalink_title + c.append(permalink) + + def build_toc_div(self, toc_list): + """ Return a string div given a toc list. """ + div = etree.Element("div") + div.attrib["class"] = "toc" + + # Add title to the div + if self.title: + header = etree.SubElement(div, "span") + header.attrib["class"] = "toctitle" + header.text = self.title + + def build_etree_ul(toc_list, parent): + ul = etree.SubElement(parent, "ul") + for item in toc_list: + # List item link, to be inserted into the toc div + li = etree.SubElement(ul, "li") + link = etree.SubElement(li, "a") + link.text = item.get('name', '') + link.attrib["href"] = '#' + item.get('id', '') + if item['children']: + build_etree_ul(item['children'], li) + return ul + + build_etree_ul(toc_list, div) + + if 'prettify' in self.md.treeprocessors: + self.md.treeprocessors['prettify'].run(div) + + return div + + def run(self, doc): + # Get a list of id attributes + used_ids = set() + for el in doc.iter(): + if "id" in el.attrib: + used_ids.add(el.attrib["id"]) + + toc_tokens = [] + for el in doc.iter(): + if isinstance(el.tag, str) and self.header_rgx.match(el.tag): + self.set_level(el) + text = get_name(el) + + # Do not override pre-existing ids + if "id" not in el.attrib: + innertext = unescape(stashedHTML2text(text, self.md)) + el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids) + + if int(el.tag[-1]) >= self.toc_top and int(el.tag[-1]) <= self.toc_bottom: + toc_tokens.append({ + 'level': int(el.tag[-1]), + 'id': el.attrib["id"], + 'name': unescape(stashedHTML2text( + code_escape(el.attrib.get('data-toc-label', text)), + self.md, strip_entities=False + )) + }) + + # Remove the data-toc-label attribute as it is no longer needed + if 'data-toc-label' in el.attrib: + del el.attrib['data-toc-label'] + + if self.use_anchors: + self.add_anchor(el, el.attrib["id"]) + if self.use_permalinks not in [False, None]: + self.add_permalink(el, el.attrib["id"]) + + toc_tokens = nest_toc_tokens(toc_tokens) + div = self.build_toc_div(toc_tokens) + if self.marker: + self.replace_marker(doc, div) + + # serialize and attach to markdown instance. + toc = self.md.serializer(div) + for pp in self.md.postprocessors: + toc = pp.run(toc) + self.md.toc_tokens = toc_tokens + self.md.toc = toc + + +class TocExtension(Extension): + + TreeProcessorClass = TocTreeprocessor + + def __init__(self, **kwargs): + self.config = { + "marker": ['[TOC]', + 'Text to find and replace with Table of Contents - ' + 'Set to an empty string to disable. Defaults to "[TOC]"'], + "title": ["", + "Title to insert into TOC <div> - " + "Defaults to an empty string"], + "anchorlink": [False, + "True if header should be a self link - " + "Defaults to False"], + "anchorlink_class": ['toclink', + 'CSS class(es) used for the link. ' + 'Defaults to "toclink"'], + "permalink": [0, + "True or link text if a Sphinx-style permalink should " + "be added - Defaults to False"], + "permalink_class": ['headerlink', + 'CSS class(es) used for the link. ' + 'Defaults to "headerlink"'], + "permalink_title": ["Permanent link", + "Title attribute of the permalink - " + "Defaults to 'Permanent link'"], + "baselevel": ['1', 'Base level for headers.'], + "slugify": [slugify, + "Function to generate anchors based on header text - " + "Defaults to the headerid ext's slugify function."], + 'separator': ['-', 'Word separator. Defaults to "-".'], + "toc_depth": [6, + 'Define the range of section levels to include in' + 'the Table of Contents. A single integer (b) defines' + 'the bottom section level (<h1>..<hb>) only.' + 'A string consisting of two digits separated by a hyphen' + 'in between ("2-5"), define the top (t) and the' + 'bottom (b) (<ht>..<hb>). Defaults to `6` (bottom).'], + } + + super().__init__(**kwargs) + + def extendMarkdown(self, md): + md.registerExtension(self) + self.md = md + self.reset() + tocext = self.TreeProcessorClass(md, self.getConfigs()) + # Headerid ext is set to '>prettify'. With this set to '_end', + # it should always come after headerid ext (and honor ids assigned + # by the header id extension) if both are used. Same goes for + # attr_list extension. This must come last because we don't want + # to redefine ids after toc is created. But we do want toc prettified. + md.treeprocessors.register(tocext, 'toc', 5) + + def reset(self): + self.md.toc = '' + self.md.toc_tokens = [] + + +def makeExtension(**kwargs): # pragma: no cover + return TocExtension(**kwargs) diff --git a/Libs/markdown/extensions/wikilinks.py b/Libs/markdown/extensions/wikilinks.py new file mode 100644 index 0000000..cddee7a --- /dev/null +++ b/Libs/markdown/extensions/wikilinks.py @@ -0,0 +1,87 @@ +''' +WikiLinks Extension for Python-Markdown +====================================== + +Converts [[WikiLinks]] to relative links. + +See <https://Python-Markdown.github.io/extensions/wikilinks> +for documentation. + +Original code Copyright [Waylan Limberg](http://achinghead.com/). + +All changes Copyright The Python Markdown Project + +License: [BSD](https://opensource.org/licenses/bsd-license.php) + +''' + +from . import Extension +from ..inlinepatterns import InlineProcessor +import xml.etree.ElementTree as etree +import re + + +def build_url(label, base, end): + """ Build a url from the label, a base, and an end. """ + clean_label = re.sub(r'([ ]+_)|(_[ ]+)|([ ]+)', '_', label) + return '{}{}{}'.format(base, clean_label, end) + + +class WikiLinkExtension(Extension): + + def __init__(self, **kwargs): + self.config = { + 'base_url': ['/', 'String to append to beginning or URL.'], + 'end_url': ['/', 'String to append to end of URL.'], + 'html_class': ['wikilink', 'CSS hook. Leave blank for none.'], + 'build_url': [build_url, 'Callable formats URL from label.'], + } + + super().__init__(**kwargs) + + def extendMarkdown(self, md): + self.md = md + + # append to end of inline patterns + WIKILINK_RE = r'\[\[([\w0-9_ -]+)\]\]' + wikilinkPattern = WikiLinksInlineProcessor(WIKILINK_RE, self.getConfigs()) + wikilinkPattern.md = md + md.inlinePatterns.register(wikilinkPattern, 'wikilink', 75) + + +class WikiLinksInlineProcessor(InlineProcessor): + def __init__(self, pattern, config): + super().__init__(pattern) + self.config = config + + def handleMatch(self, m, data): + if m.group(1).strip(): + base_url, end_url, html_class = self._getMeta() + label = m.group(1).strip() + url = self.config['build_url'](label, base_url, end_url) + a = etree.Element('a') + a.text = label + a.set('href', url) + if html_class: + a.set('class', html_class) + else: + a = '' + return a, m.start(0), m.end(0) + + def _getMeta(self): + """ Return meta data or config data. """ + base_url = self.config['base_url'] + end_url = self.config['end_url'] + html_class = self.config['html_class'] + if hasattr(self.md, 'Meta'): + if 'wiki_base_url' in self.md.Meta: + base_url = self.md.Meta['wiki_base_url'][0] + if 'wiki_end_url' in self.md.Meta: + end_url = self.md.Meta['wiki_end_url'][0] + if 'wiki_html_class' in self.md.Meta: + html_class = self.md.Meta['wiki_html_class'][0] + return base_url, end_url, html_class + + +def makeExtension(**kwargs): # pragma: no cover + return WikiLinkExtension(**kwargs) diff --git a/Libs/markdown/htmlparser.py b/Libs/markdown/htmlparser.py new file mode 100644 index 0000000..7ca858e --- /dev/null +++ b/Libs/markdown/htmlparser.py @@ -0,0 +1,323 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2020 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import re +import importlib +import sys + + +# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. +# Users can still do `from html import parser` and get the default behavior. +spec = importlib.util.find_spec('html.parser') +htmlparser = importlib.util.module_from_spec(spec) +spec.loader.exec_module(htmlparser) +sys.modules['htmlparser'] = htmlparser + +# Monkeypatch HTMLParser to only accept `?>` to close Processing Instructions. +htmlparser.piclose = re.compile(r'\?>') +# Monkeypatch HTMLParser to only recognize entity references with a closing semicolon. +htmlparser.entityref = re.compile(r'&([a-zA-Z][-.a-zA-Z0-9]*);') +# Monkeypatch HTMLParser to no longer support partial entities. We are always feeding a complete block, +# so the 'incomplete' functionality is unnecessary. As the entityref regex is run right before incomplete, +# and the two regex are the same, then incomplete will simply never match and we avoid the logic within. +htmlparser.incomplete = htmlparser.entityref +# Monkeypatch HTMLParser to not accept a backtick in a tag name, attribute name, or bare value. +htmlparser.locatestarttagend_tolerant = re.compile(r""" + <[a-zA-Z][^`\t\n\r\f />\x00]* # tag name <= added backtick here + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^`\s/>][^\s/=>]* # attribute name <= added backtick here + (?:\s*=+\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^`>\s]* # bare value <= added backtick here + ) + (?:\s*,)* # possibly followed by a comma + )?(?:\s|/(?!>))* + )* + )? + \s* # trailing whitespace +""", re.VERBOSE) + +# Match a blank line at the start of a block of text (two newlines). +# The newlines may be preceded by additional whitespace. +blank_line_re = re.compile(r'^([ ]*\n){2}') + + +class HTMLExtractor(htmlparser.HTMLParser): + """ + Extract raw HTML from text. + + The raw HTML is stored in the `htmlStash` of the Markdown instance passed + to `md` and the remaining text is stored in `cleandoc` as a list of strings. + """ + + def __init__(self, md, *args, **kwargs): + if 'convert_charrefs' not in kwargs: + kwargs['convert_charrefs'] = False + + # Block tags that should contain no content (self closing) + self.empty_tags = set(['hr']) + + # This calls self.reset + super().__init__(*args, **kwargs) + self.md = md + + def reset(self): + """Reset this instance. Loses all unprocessed data.""" + self.inraw = False + self.intail = False + self.stack = [] # When inraw==True, stack contains a list of tags + self._cache = [] + self.cleandoc = [] + super().reset() + + def close(self): + """Handle any buffered data.""" + super().close() + if len(self.rawdata): + # Temp fix for https://bugs.python.org/issue41989 + # TODO: remove this when the bug is fixed in all supported Python versions. + if self.convert_charrefs and not self.cdata_elem: # pragma: no cover + self.handle_data(htmlparser.unescape(self.rawdata)) + else: + self.handle_data(self.rawdata) + # Handle any unclosed tags. + if len(self._cache): + self.cleandoc.append(self.md.htmlStash.store(''.join(self._cache))) + self._cache = [] + + @property + def line_offset(self): + """Returns char index in self.rawdata for the start of the current line. """ + if self.lineno > 1 and '\n' in self.rawdata: + m = re.match(r'([^\n]*\n){{{}}}'.format(self.lineno-1), self.rawdata) + if m: + return m.end() + else: # pragma: no cover + # Value of self.lineno must exceed total number of lines. + # Find index of beginning of last line. + return self.rawdata.rfind('\n') + return 0 + + def at_line_start(self): + """ + Returns True if current position is at start of line. + + Allows for up to three blank spaces at start of line. + """ + if self.offset == 0: + return True + if self.offset > 3: + return False + # Confirm up to first 3 chars are whitespace + return self.rawdata[self.line_offset:self.line_offset + self.offset].strip() == '' + + def get_endtag_text(self, tag): + """ + Returns the text of the end tag. + + If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`. + """ + # Attempt to extract actual tag from raw source text + start = self.line_offset + self.offset + m = htmlparser.endendtag.search(self.rawdata, start) + if m: + return self.rawdata[start:m.end()] + else: # pragma: no cover + # Failed to extract from raw data. Assume well formed and lowercase. + return '</{}>'.format(tag) + + def handle_starttag(self, tag, attrs): + # Handle tags that should always be empty and do not specify a closing tag + if tag in self.empty_tags: + self.handle_startendtag(tag, attrs) + return + + if self.md.is_block_level(tag) and (self.intail or (self.at_line_start() and not self.inraw)): + # Started a new raw block. Prepare stack. + self.inraw = True + self.cleandoc.append('\n') + + text = self.get_starttag_text() + if self.inraw: + self.stack.append(tag) + self._cache.append(text) + else: + self.cleandoc.append(text) + if tag in self.CDATA_CONTENT_ELEMENTS: + # This is presumably a standalone tag in a code span (see #1036). + self.clear_cdata_mode() + + def handle_endtag(self, tag): + text = self.get_endtag_text(tag) + + if self.inraw: + self._cache.append(text) + if tag in self.stack: + # Remove tag from stack + while self.stack: + if self.stack.pop() == tag: + break + if len(self.stack) == 0: + # End of raw block. + if blank_line_re.match(self.rawdata[self.line_offset + self.offset + len(text):]): + # Preserve blank line and end of raw block. + self._cache.append('\n') + else: + # More content exists after endtag. + self.intail = True + # Reset stack. + self.inraw = False + self.cleandoc.append(self.md.htmlStash.store(''.join(self._cache))) + # Insert blank line between this and next line. + self.cleandoc.append('\n\n') + self._cache = [] + else: + self.cleandoc.append(text) + + def handle_data(self, data): + if self.intail and '\n' in data: + self.intail = False + if self.inraw: + self._cache.append(data) + else: + self.cleandoc.append(data) + + def handle_empty_tag(self, data, is_block): + """ Handle empty tags (`<data>`). """ + if self.inraw or self.intail: + # Append this to the existing raw block + self._cache.append(data) + elif self.at_line_start() and is_block: + # Handle this as a standalone raw block + if blank_line_re.match(self.rawdata[self.line_offset + self.offset + len(data):]): + # Preserve blank line after tag in raw block. + data += '\n' + else: + # More content exists after tag. + self.intail = True + item = self.cleandoc[-1] if self.cleandoc else '' + # If we only have one newline before block element, add another + if not item.endswith('\n\n') and item.endswith('\n'): + self.cleandoc.append('\n') + self.cleandoc.append(self.md.htmlStash.store(data)) + # Insert blank line between this and next line. + self.cleandoc.append('\n\n') + else: + self.cleandoc.append(data) + + def handle_startendtag(self, tag, attrs): + self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag)) + + def handle_charref(self, name): + self.handle_empty_tag('&#{};'.format(name), is_block=False) + + def handle_entityref(self, name): + self.handle_empty_tag('&{};'.format(name), is_block=False) + + def handle_comment(self, data): + self.handle_empty_tag('<!--{}-->'.format(data), is_block=True) + + def handle_decl(self, data): + self.handle_empty_tag('<!{}>'.format(data), is_block=True) + + def handle_pi(self, data): + self.handle_empty_tag('<?{}?>'.format(data), is_block=True) + + def unknown_decl(self, data): + end = ']]>' if data.startswith('CDATA[') else ']>' + self.handle_empty_tag('<![{}{}'.format(data, end), is_block=True) + + def parse_pi(self, i): + if self.at_line_start() or self.intail: + return super().parse_pi(i) + # This is not the beginning of a raw block so treat as plain data + # and avoid consuming any tags which may follow (see #1066). + self.handle_data('<?') + return i + 2 + + def parse_html_declaration(self, i): + if self.at_line_start() or self.intail: + return super().parse_html_declaration(i) + # This is not the beginning of a raw block so treat as plain data + # and avoid consuming any tags which may follow (see #1066). + self.handle_data('<!') + return i + 2 + + # The rest has been copied from base class in standard lib to address #1036. + # As __startag_text is private, all references to it must be in this subclass. + # The last few lines of parse_starttag are reversed so that handle_starttag + # can override cdata_mode in certain situations (in a code span). + __starttag_text = None + + def get_starttag_text(self): + """Return full source of start tag: '<...>'.""" + return self.__starttag_text + + def parse_starttag(self, i): # pragma: no cover + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = htmlparser.tagfind_tolerant.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = match.group(1).lower() + while k < endpos: + m = htmlparser.attrfind_tolerant.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: # noqa: E127 + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = htmlparser.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") # noqa: E127 + else: + offset = offset + len(self.__starttag_text) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: <span attr="value" /> + self.handle_startendtag(tag, attrs) + else: + # *** set cdata_mode first so we can override it in handle_starttag (see #1036) *** + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + self.handle_starttag(tag, attrs) + return endpos diff --git a/Libs/markdown/inlinepatterns.py b/Libs/markdown/inlinepatterns.py new file mode 100644 index 0000000..eb74f49 --- /dev/null +++ b/Libs/markdown/inlinepatterns.py @@ -0,0 +1,892 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + +INLINE PATTERNS +============================================================================= + +Inline patterns such as *emphasis* are handled by means of auxiliary +objects, one per pattern. Pattern objects must be instances of classes +that extend markdown.Pattern. Each pattern object uses a single regular +expression and needs support the following methods: + + pattern.getCompiledRegExp() # returns a regular expression + + pattern.handleMatch(m) # takes a match object and returns + # an ElementTree element or just plain text + +All of python markdown's built-in patterns subclass from Pattern, +but you can add additional patterns that don't. + +Also note that all the regular expressions used by inline must +capture the whole block. For this reason, they all start with +'^(.*)' and end with '(.*)!'. In case with built-in expression +Pattern takes care of adding the "^(.*)" and "(.*)!". + +Finally, the order in which regular expressions are applied is very +important - e.g. if we first replace http://.../ links with <a> tags +and _then_ try to replace inline html, we would end up with a mess. +So, we apply the expressions in the following order: + +* escape and backticks have to go before everything else, so + that we can preempt any markdown patterns by escaping them. + +* then we handle auto-links (must be done before inline html) + +* then we handle inline HTML. At this point we will simply + replace all inline HTML strings with a placeholder and add + the actual HTML to a hash. + +* then inline images (must be done before links) + +* then bracketed links, first regular then reference-style + +* finally we apply strong and emphasis +""" + +from . import util +from collections import namedtuple +import re +import xml.etree.ElementTree as etree +try: # pragma: no cover + from html import entities +except ImportError: # pragma: no cover + import htmlentitydefs as entities + + +def build_inlinepatterns(md, **kwargs): + """ Build the default set of inline patterns for Markdown. """ + inlinePatterns = util.Registry() + inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190) + inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180) + inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170) + inlinePatterns.register(LinkInlineProcessor(LINK_RE, md), 'link', 160) + inlinePatterns.register(ImageInlineProcessor(IMAGE_LINK_RE, md), 'image_link', 150) + inlinePatterns.register( + ImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'image_reference', 140 + ) + inlinePatterns.register( + ShortReferenceInlineProcessor(REFERENCE_RE, md), 'short_reference', 130 + ) + inlinePatterns.register( + ShortImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'short_image_ref', 125 + ) + inlinePatterns.register(AutolinkInlineProcessor(AUTOLINK_RE, md), 'autolink', 120) + inlinePatterns.register(AutomailInlineProcessor(AUTOMAIL_RE, md), 'automail', 110) + inlinePatterns.register(SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br'), 'linebreak', 100) + inlinePatterns.register(HtmlInlineProcessor(HTML_RE, md), 'html', 90) + inlinePatterns.register(HtmlInlineProcessor(ENTITY_RE, md), 'entity', 80) + inlinePatterns.register(SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 70) + inlinePatterns.register(AsteriskProcessor(r'\*'), 'em_strong', 60) + inlinePatterns.register(UnderscoreProcessor(r'_'), 'em_strong2', 50) + return inlinePatterns + + +""" +The actual regular expressions for patterns +----------------------------------------------------------------------------- +""" + +NOIMG = r'(?<!\!)' + +# `e=f()` or ``e=f("`")`` +BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))' + +# \< +ESCAPE_RE = r'\\(.)' + +# *emphasis* +EMPHASIS_RE = r'(\*)([^\*]+)\1' + +# **strong** +STRONG_RE = r'(\*{2})(.+?)\1' + +# __smart__strong__ +SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)' + +# _smart_emphasis_ +SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)' + +# __strong _em__ +SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)' + +# ***strongem*** or ***em*strong** +EM_STRONG_RE = r'(\*)\1{2}(.+?)\1(.*?)\1{2}' + +# ___strongem___ or ___em_strong__ +EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}' + +# ***strong**em* +STRONG_EM_RE = r'(\*)\1{2}(.+?)\1{2}(.*?)\1' + +# ___strong__em_ +STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1' + +# **strong*em*** +STRONG_EM3_RE = r'(\*)\1(?!\1)([^*]+?)\1(?!\1)(.+?)\1{3}' + +# [text](url) or [text](<url>) or [text](url "title") +LINK_RE = NOIMG + r'\[' + +# ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>) +IMAGE_LINK_RE = r'\!\[' + +# [Google][3] +REFERENCE_RE = LINK_RE + +# ![alt text][2] +IMAGE_REFERENCE_RE = IMAGE_LINK_RE + +# stand-alone * or _ +NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))' + +# <http://www.123.com> +AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>' + +# <me@example.com> +AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>' + +# <...> +HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!<!--|-->).)*--)>)' + +# "&" (decimal) or "&" (hex) or "&" (named) +ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)' + +# two spaces at end of line +LINE_BREAK_RE = r' \n' + + +def dequote(string): + """Remove quotes from around a string.""" + if ((string.startswith('"') and string.endswith('"')) or + (string.startswith("'") and string.endswith("'"))): + return string[1:-1] + else: + return string + + +class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])): + """Emphasis/strong pattern item.""" + + +""" +The pattern classes +----------------------------------------------------------------------------- +""" + + +class Pattern: # pragma: no cover + """Base class that inline patterns subclass. """ + + ANCESTOR_EXCLUDES = tuple() + + def __init__(self, pattern, md=None): + """ + Create an instant of an inline pattern. + + Keyword arguments: + + * pattern: A regular expression that matches a pattern + + """ + self.pattern = pattern + self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern, + re.DOTALL | re.UNICODE) + + self.md = md + + @property + @util.deprecated("Use 'md' instead.") + def markdown(self): + # TODO: remove this later + return self.md + + def getCompiledRegExp(self): + """ Return a compiled regular expression. """ + return self.compiled_re + + def handleMatch(self, m): + """Return a ElementTree element from the given match. + + Subclasses should override this method. + + Keyword arguments: + + * m: A re match object containing a match of the pattern. + + """ + pass # pragma: no cover + + def type(self): + """ Return class name, to define pattern type """ + return self.__class__.__name__ + + def unescape(self, text): + """ Return unescaped text given text with an inline placeholder. """ + try: + stash = self.md.treeprocessors['inline'].stashed_nodes + except KeyError: # pragma: no cover + return text + + def get_stash(m): + id = m.group(1) + if id in stash: + value = stash.get(id) + if isinstance(value, str): + return value + else: + # An etree Element - return text content only + return ''.join(value.itertext()) + return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) + + +class InlineProcessor(Pattern): + """ + Base class that inline patterns subclass. + + This is the newer style inline processor that uses a more + efficient and flexible search approach. + """ + + def __init__(self, pattern, md=None): + """ + Create an instant of an inline pattern. + + Keyword arguments: + + * pattern: A regular expression that matches a pattern + + """ + self.pattern = pattern + self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE) + + # Api for Markdown to pass safe_mode into instance + self.safe_mode = False + self.md = md + + def handleMatch(self, m, data): + """Return a ElementTree element from the given match and the + start and end index of the matched text. + + If `start` and/or `end` are returned as `None`, it will be + assumed that the processor did not find a valid region of text. + + Subclasses should override this method. + + Keyword arguments: + + * m: A re match object containing a match of the pattern. + * data: The buffer current under analysis + + Returns: + + * el: The ElementTree element, text or None. + * start: The start of the region that has been matched or None. + * end: The end of the region that has been matched or None. + + """ + pass # pragma: no cover + + +class SimpleTextPattern(Pattern): # pragma: no cover + """ Return a simple text of group(2) of a Pattern. """ + def handleMatch(self, m): + return m.group(2) + + +class SimpleTextInlineProcessor(InlineProcessor): + """ Return a simple text of group(1) of a Pattern. """ + def handleMatch(self, m, data): + return m.group(1), m.start(0), m.end(0) + + +class EscapeInlineProcessor(InlineProcessor): + """ Return an escaped character. """ + + def handleMatch(self, m, data): + char = m.group(1) + if char in self.md.ESCAPED_CHARS: + return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0) + else: + return None, m.start(0), m.end(0) + + +class SimpleTagPattern(Pattern): # pragma: no cover + """ + Return element of type `tag` with a text attribute of group(3) + of a Pattern. + + """ + def __init__(self, pattern, tag): + Pattern.__init__(self, pattern) + self.tag = tag + + def handleMatch(self, m): + el = etree.Element(self.tag) + el.text = m.group(3) + return el + + +class SimpleTagInlineProcessor(InlineProcessor): + """ + Return element of type `tag` with a text attribute of group(2) + of a Pattern. + + """ + def __init__(self, pattern, tag): + InlineProcessor.__init__(self, pattern) + self.tag = tag + + def handleMatch(self, m, data): # pragma: no cover + el = etree.Element(self.tag) + el.text = m.group(2) + return el, m.start(0), m.end(0) + + +class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover + """ Return an element of type `tag` with no children. """ + def handleMatch(self, m): + return etree.Element(self.tag) + + +class SubstituteTagInlineProcessor(SimpleTagInlineProcessor): + """ Return an element of type `tag` with no children. """ + def handleMatch(self, m, data): + return etree.Element(self.tag), m.start(0), m.end(0) + + +class BacktickInlineProcessor(InlineProcessor): + """ Return a `<code>` element containing the matching text. """ + def __init__(self, pattern): + InlineProcessor.__init__(self, pattern) + self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX) + self.tag = 'code' + + def handleMatch(self, m, data): + if m.group(3): + el = etree.Element(self.tag) + el.text = util.AtomicString(util.code_escape(m.group(3).strip())) + return el, m.start(0), m.end(0) + else: + return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0) + + +class DoubleTagPattern(SimpleTagPattern): # pragma: no cover + """Return a ElementTree element nested in tag2 nested in tag1. + + Useful for strong emphasis etc. + + """ + def handleMatch(self, m): + tag1, tag2 = self.tag.split(",") + el1 = etree.Element(tag1) + el2 = etree.SubElement(el1, tag2) + el2.text = m.group(3) + if len(m.groups()) == 5: + el2.tail = m.group(4) + return el1 + + +class DoubleTagInlineProcessor(SimpleTagInlineProcessor): + """Return a ElementTree element nested in tag2 nested in tag1. + + Useful for strong emphasis etc. + + """ + def handleMatch(self, m, data): # pragma: no cover + tag1, tag2 = self.tag.split(",") + el1 = etree.Element(tag1) + el2 = etree.SubElement(el1, tag2) + el2.text = m.group(2) + if len(m.groups()) == 3: + el2.tail = m.group(3) + return el1, m.start(0), m.end(0) + + +class HtmlInlineProcessor(InlineProcessor): + """ Store raw inline html and return a placeholder. """ + def handleMatch(self, m, data): + rawhtml = self.unescape(m.group(1)) + place_holder = self.md.htmlStash.store(rawhtml) + return place_holder, m.start(0), m.end(0) + + def unescape(self, text): + """ Return unescaped text given text with an inline placeholder. """ + try: + stash = self.md.treeprocessors['inline'].stashed_nodes + except KeyError: # pragma: no cover + return text + + def get_stash(m): + id = m.group(1) + value = stash.get(id) + if value is not None: + try: + return self.md.serializer(value) + except Exception: + return r'\%s' % value + + return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) + + +class AsteriskProcessor(InlineProcessor): + """Emphasis processor for handling strong and em matches inside asterisks.""" + + PATTERNS = [ + EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), + EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), + EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), + EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), + EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') + ] + + def build_single(self, m, tag, idx): + """Return single tag.""" + el1 = etree.Element(tag) + text = m.group(2) + self.parse_sub_patterns(text, el1, None, idx) + return el1 + + def build_double(self, m, tags, idx): + """Return double tag.""" + + tag1, tag2 = tags.split(",") + el1 = etree.Element(tag1) + el2 = etree.Element(tag2) + text = m.group(2) + self.parse_sub_patterns(text, el2, None, idx) + el1.append(el2) + if len(m.groups()) == 3: + text = m.group(3) + self.parse_sub_patterns(text, el1, el2, idx) + return el1 + + def build_double2(self, m, tags, idx): + """Return double tags (variant 2): `<strong>text <em>text</em></strong>`.""" + + tag1, tag2 = tags.split(",") + el1 = etree.Element(tag1) + el2 = etree.Element(tag2) + text = m.group(2) + self.parse_sub_patterns(text, el1, None, idx) + text = m.group(3) + el1.append(el2) + self.parse_sub_patterns(text, el2, None, idx) + return el1 + + def parse_sub_patterns(self, data, parent, last, idx): + """ + Parses sub patterns. + + `data` (`str`): + text to evaluate. + + `parent` (`etree.Element`): + Parent to attach text and sub elements to. + + `last` (`etree.Element`): + Last appended child to parent. Can also be None if parent has no children. + + `idx` (`int`): + Current pattern index that was used to evaluate the parent. + + """ + + offset = 0 + pos = 0 + + length = len(data) + while pos < length: + # Find the start of potential emphasis or strong tokens + if self.compiled_re.match(data, pos): + matched = False + # See if the we can match an emphasis/strong pattern + for index, item in enumerate(self.PATTERNS): + # Only evaluate patterns that are after what was used on the parent + if index <= idx: + continue + m = item.pattern.match(data, pos) + if m: + # Append child nodes to parent + # Text nodes should be appended to the last + # child if present, and if not, it should + # be added as the parent's text node. + text = data[offset:m.start(0)] + if text: + if last is not None: + last.tail = text + else: + parent.text = text + el = self.build_element(m, item.builder, item.tags, index) + parent.append(el) + last = el + # Move our position past the matched hunk + offset = pos = m.end(0) + matched = True + if not matched: + # We matched nothing, move on to the next character + pos += 1 + else: + # Increment position as no potential emphasis start was found. + pos += 1 + + # Append any leftover text as a text node. + text = data[offset:] + if text: + if last is not None: + last.tail = text + else: + parent.text = text + + def build_element(self, m, builder, tags, index): + """Element builder.""" + + if builder == 'double2': + return self.build_double2(m, tags, index) + elif builder == 'double': + return self.build_double(m, tags, index) + else: + return self.build_single(m, tags, index) + + def handleMatch(self, m, data): + """Parse patterns.""" + + el = None + start = None + end = None + + for index, item in enumerate(self.PATTERNS): + m1 = item.pattern.match(data, m.start(0)) + if m1: + start = m1.start(0) + end = m1.end(0) + el = self.build_element(m1, item.builder, item.tags, index) + break + return el, start, end + + +class UnderscoreProcessor(AsteriskProcessor): + """Emphasis processor for handling strong and em matches inside underscores.""" + + PATTERNS = [ + EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'), + EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'), + EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'), + EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'), + EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em') + ] + + +class LinkInlineProcessor(InlineProcessor): + """ Return a link element from the given match. """ + RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE) + RE_TITLE_CLEAN = re.compile(r'\s') + + def handleMatch(self, m, data): + text, index, handled = self.getText(data, m.end(0)) + + if not handled: + return None, None, None + + href, title, index, handled = self.getLink(data, index) + if not handled: + return None, None, None + + el = etree.Element("a") + el.text = text + + el.set("href", href) + + if title is not None: + el.set("title", title) + + return el, m.start(0), index + + def getLink(self, data, index): + """Parse data between `()` of `[Text]()` allowing recursive `()`. """ + + href = '' + title = None + handled = False + + m = self.RE_LINK.match(data, pos=index) + if m and m.group(1): + # Matches [Text](<link> "title") + href = m.group(1)[1:-1].strip() + if m.group(2): + title = m.group(2)[1:-1] + index = m.end(0) + handled = True + elif m: + # Track bracket nesting and index in string + bracket_count = 1 + backtrack_count = 1 + start_index = m.end() + index = start_index + last_bracket = -1 + + # Primary (first found) quote tracking. + quote = None + start_quote = -1 + exit_quote = -1 + ignore_matches = False + + # Secondary (second found) quote tracking. + alt_quote = None + start_alt_quote = -1 + exit_alt_quote = -1 + + # Track last character + last = '' + + for pos in range(index, len(data)): + c = data[pos] + if c == '(': + # Count nested ( + # Don't increment the bracket count if we are sure we're in a title. + if not ignore_matches: + bracket_count += 1 + elif backtrack_count > 0: + backtrack_count -= 1 + elif c == ')': + # Match nested ) to ( + # Don't decrement if we are sure we are in a title that is unclosed. + if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)): + bracket_count = 0 + elif not ignore_matches: + bracket_count -= 1 + elif backtrack_count > 0: + backtrack_count -= 1 + # We've found our backup end location if the title doesn't resolve. + if backtrack_count == 0: + last_bracket = index + 1 + + elif c in ("'", '"'): + # Quote has started + if not quote: + # We'll assume we are now in a title. + # Brackets are quoted, so no need to match them (except for the final one). + ignore_matches = True + backtrack_count = bracket_count + bracket_count = 1 + start_quote = index + 1 + quote = c + # Secondary quote (in case the first doesn't resolve): [text](link'"title") + elif c != quote and not alt_quote: + start_alt_quote = index + 1 + alt_quote = c + # Update primary quote match + elif c == quote: + exit_quote = index + 1 + # Update secondary quote match + elif alt_quote and c == alt_quote: + exit_alt_quote = index + 1 + + index += 1 + + # Link is closed, so let's break out of the loop + if bracket_count == 0: + # Get the title if we closed a title string right before link closed + if exit_quote >= 0 and quote == last: + href = data[start_index:start_quote - 1] + title = ''.join(data[start_quote:exit_quote - 1]) + elif exit_alt_quote >= 0 and alt_quote == last: + href = data[start_index:start_alt_quote - 1] + title = ''.join(data[start_alt_quote:exit_alt_quote - 1]) + else: + href = data[start_index:index - 1] + break + + if c != ' ': + last = c + + # We have a scenario: [test](link"notitle) + # When we enter a string, we stop tracking bracket resolution in the main counter, + # but we do keep a backup counter up until we discover where we might resolve all brackets + # if the title string fails to resolve. + if bracket_count != 0 and backtrack_count == 0: + href = data[start_index:last_bracket - 1] + index = last_bracket + bracket_count = 0 + + handled = bracket_count == 0 + + if title is not None: + title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip()))) + + href = self.unescape(href).strip() + + return href, title, index, handled + + def getText(self, data, index): + """Parse the content between `[]` of the start of an image or link + resolving nested square brackets. + + """ + bracket_count = 1 + text = [] + for pos in range(index, len(data)): + c = data[pos] + if c == ']': + bracket_count -= 1 + elif c == '[': + bracket_count += 1 + index += 1 + if bracket_count == 0: + break + text.append(c) + return ''.join(text), index, bracket_count == 0 + + +class ImageInlineProcessor(LinkInlineProcessor): + """ Return a img element from the given match. """ + + def handleMatch(self, m, data): + text, index, handled = self.getText(data, m.end(0)) + if not handled: + return None, None, None + + src, title, index, handled = self.getLink(data, index) + if not handled: + return None, None, None + + el = etree.Element("img") + + el.set("src", src) + + if title is not None: + el.set("title", title) + + el.set('alt', self.unescape(text)) + return el, m.start(0), index + + +class ReferenceInlineProcessor(LinkInlineProcessor): + """ Match to a stored reference and return link element. """ + NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE) + + RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE) + + def handleMatch(self, m, data): + text, index, handled = self.getText(data, m.end(0)) + if not handled: + return None, None, None + + id, end, handled = self.evalId(data, index, text) + if not handled: + return None, None, None + + # Clean up linebreaks in id + id = self.NEWLINE_CLEANUP_RE.sub(' ', id) + if id not in self.md.references: # ignore undefined refs + return None, m.start(0), end + + href, title = self.md.references[id] + + return self.makeTag(href, title, text), m.start(0), end + + def evalId(self, data, index, text): + """ + Evaluate the id portion of [ref][id]. + + If [ref][] use [ref]. + """ + m = self.RE_LINK.match(data, pos=index) + if not m: + return None, index, False + else: + id = m.group(1).lower() + end = m.end(0) + if not id: + id = text.lower() + return id, end, True + + def makeTag(self, href, title, text): + el = etree.Element('a') + + el.set('href', href) + if title: + el.set('title', title) + + el.text = text + return el + + +class ShortReferenceInlineProcessor(ReferenceInlineProcessor): + """Short form of reference: [google]. """ + def evalId(self, data, index, text): + """Evaluate the id from of [ref] """ + + return text.lower(), index, True + + +class ImageReferenceInlineProcessor(ReferenceInlineProcessor): + """ Match to a stored reference and return img element. """ + def makeTag(self, href, title, text): + el = etree.Element("img") + el.set("src", href) + if title: + el.set("title", title) + el.set("alt", self.unescape(text)) + return el + + +class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor): + """ Short form of inage reference: ![ref]. """ + def evalId(self, data, index, text): + """Evaluate the id from of [ref] """ + + return text.lower(), index, True + + +class AutolinkInlineProcessor(InlineProcessor): + """ Return a link Element given an autolink (`<http://example/com>`). """ + def handleMatch(self, m, data): + el = etree.Element("a") + el.set('href', self.unescape(m.group(1))) + el.text = util.AtomicString(m.group(1)) + return el, m.start(0), m.end(0) + + +class AutomailInlineProcessor(InlineProcessor): + """ + Return a mailto link Element given an automail link (`<foo@example.com>`). + """ + def handleMatch(self, m, data): + el = etree.Element('a') + email = self.unescape(m.group(1)) + if email.startswith("mailto:"): + email = email[len("mailto:"):] + + def codepoint2name(code): + """Return entity definition by code, or the code if not defined.""" + entity = entities.codepoint2name.get(code) + if entity: + return "{}{};".format(util.AMP_SUBSTITUTE, entity) + else: + return "%s#%d;" % (util.AMP_SUBSTITUTE, code) + + letters = [codepoint2name(ord(letter)) for letter in email] + el.text = util.AtomicString(''.join(letters)) + + mailto = "mailto:" + email + mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' % + ord(letter) for letter in mailto]) + el.set('href', mailto) + return el, m.start(0), m.end(0) diff --git a/Libs/markdown/pep562.py b/Libs/markdown/pep562.py new file mode 100644 index 0000000..b130d3b --- /dev/null +++ b/Libs/markdown/pep562.py @@ -0,0 +1,245 @@ +""" +Backport of PEP 562. + +https://pypi.org/search/?q=pep562 + +Licensed under MIT +Copyright (c) 2018 Isaac Muse <isaacmuse@gmail.com> + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF +CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. +""" +import sys +from collections import namedtuple +import re + +__all__ = ('Pep562',) + +RE_VER = re.compile( + r'''(?x) + (?P<major>\d+)(?:\.(?P<minor>\d+))?(?:\.(?P<micro>\d+))? + (?:(?P<type>a|b|rc)(?P<pre>\d+))? + (?:\.post(?P<post>\d+))? + (?:\.dev(?P<dev>\d+))? + ''' +) + +REL_MAP = { + ".dev": "", + ".dev-alpha": "a", + ".dev-beta": "b", + ".dev-candidate": "rc", + "alpha": "a", + "beta": "b", + "candidate": "rc", + "final": "" +} + +DEV_STATUS = { + ".dev": "2 - Pre-Alpha", + ".dev-alpha": "2 - Pre-Alpha", + ".dev-beta": "2 - Pre-Alpha", + ".dev-candidate": "2 - Pre-Alpha", + "alpha": "3 - Alpha", + "beta": "4 - Beta", + "candidate": "4 - Beta", + "final": "5 - Production/Stable" +} + +PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'} + + +class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])): + """ + Get the version (PEP 440). + + A biased approach to the PEP 440 semantic version. + + Provides a tuple structure which is sorted for comparisons `v1 > v2` etc. + (major, minor, micro, release type, pre-release build, post-release build, development release build) + Release types are named in is such a way they are comparable with ease. + Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get + development status for setup files. + + How it works (currently): + + - You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`. + - To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`. + The dot is used to ensure all development specifiers are sorted before `alpha`. + You can specify a `dev` number for development builds, but do not have to as implicit development releases + are allowed. + - You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not + allow implicit prereleases. + - You can optionally set `post` to a value greater than zero to make the build a post release. While post releases + are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be + noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically + does not allow implicit post releases. + - It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`. + + Acceptable version releases: + + ``` + Version(1, 0, 0, "final") 1.0 + Version(1, 2, 0, "final") 1.2 + Version(1, 2, 3, "final") 1.2.3 + Version(1, 2, 0, ".dev-alpha", pre=4) 1.2a4 + Version(1, 2, 0, ".dev-beta", pre=4) 1.2b4 + Version(1, 2, 0, ".dev-candidate", pre=4) 1.2rc4 + Version(1, 2, 0, "final", post=1) 1.2.post1 + Version(1, 2, 3, ".dev") 1.2.3.dev0 + Version(1, 2, 3, ".dev", dev=1) 1.2.3.dev1 + ``` + + """ + + def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0): + """Validate version info.""" + + # Ensure all parts are positive integers. + for value in (major, minor, micro, pre, post): + if not (isinstance(value, int) and value >= 0): + raise ValueError("All version parts except 'release' should be integers.") + + if release not in REL_MAP: + raise ValueError("'{}' is not a valid release type.".format(release)) + + # Ensure valid pre-release (we do not allow implicit pre-releases). + if ".dev-candidate" < release < "final": + if pre == 0: + raise ValueError("Implicit pre-releases not allowed.") + elif dev: + raise ValueError("Version is not a development release.") + elif post: + raise ValueError("Post-releases are not allowed with pre-releases.") + + # Ensure valid development or development/pre release + elif release < "alpha": + if release > ".dev" and pre == 0: + raise ValueError("Implicit pre-release not allowed.") + elif post: + raise ValueError("Post-releases are not allowed with pre-releases.") + + # Ensure a valid normal release + else: + if pre: + raise ValueError("Version is not a pre-release.") + elif dev: + raise ValueError("Version is not a development release.") + + return super().__new__(cls, major, minor, micro, release, pre, post, dev) + + def _is_pre(self): + """Is prerelease.""" + + return self.pre > 0 + + def _is_dev(self): + """Is development.""" + + return bool(self.release < "alpha") + + def _is_post(self): + """Is post.""" + + return self.post > 0 + + def _get_dev_status(self): # pragma: no cover + """Get development status string.""" + + return DEV_STATUS[self.release] + + def _get_canonical(self): + """Get the canonical output string.""" + + # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed.. + if self.micro == 0: + ver = "{}.{}".format(self.major, self.minor) + else: + ver = "{}.{}.{}".format(self.major, self.minor, self.micro) + if self._is_pre(): + ver += '{}{}'.format(REL_MAP[self.release], self.pre) + if self._is_post(): + ver += ".post{}".format(self.post) + if self._is_dev(): + ver += ".dev{}".format(self.dev) + + return ver + + +def parse_version(ver, pre=False): + """Parse version into a comparable Version tuple.""" + + m = RE_VER.match(ver) + + # Handle major, minor, micro + major = int(m.group('major')) + minor = int(m.group('minor')) if m.group('minor') else 0 + micro = int(m.group('micro')) if m.group('micro') else 0 + + # Handle pre releases + if m.group('type'): + release = PRE_REL_MAP[m.group('type')] + pre = int(m.group('pre')) + else: + release = "final" + pre = 0 + + # Handle development releases + dev = m.group('dev') if m.group('dev') else 0 + if m.group('dev'): + dev = int(m.group('dev')) + release = '.dev-' + release if pre else '.dev' + else: + dev = 0 + + # Handle post + post = int(m.group('post')) if m.group('post') else 0 + + return Version(major, minor, micro, release, pre, post, dev) + + +class Pep562: + """ + Backport of PEP 562 <https://pypi.org/search/?q=pep562>. + + Wraps the module in a class that exposes the mechanics to override `__dir__` and `__getattr__`. + The given module will be searched for overrides of `__dir__` and `__getattr__` and use them when needed. + """ + + def __init__(self, name): + """Acquire `__getattr__` and `__dir__`, but only replace module for versions less than Python 3.7.""" + + self._module = sys.modules[name] + self._get_attr = getattr(self._module, '__getattr__', None) + self._get_dir = getattr(self._module, '__dir__', None) + sys.modules[name] = self + + def __dir__(self): + """Return the overridden `dir` if one was provided, else apply `dir` to the module.""" + + return self._get_dir() if self._get_dir else dir(self._module) + + def __getattr__(self, name): + """Attempt to retrieve the attribute from the module, and if missing, use the overridden function if present.""" + + try: + return getattr(self._module, name) + except AttributeError: + if self._get_attr: + return self._get_attr(name) + raise + + +__version_info__ = Version(1, 0, 0, "final") +__version__ = __version_info__._get_canonical() diff --git a/Libs/markdown/postprocessors.py b/Libs/markdown/postprocessors.py new file mode 100644 index 0000000..f4fb924 --- /dev/null +++ b/Libs/markdown/postprocessors.py @@ -0,0 +1,134 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + +POST-PROCESSORS +============================================================================= + +Markdown also allows post-processors, which are similar to preprocessors in +that they need to implement a "run" method. However, they are run after core +processing. + +""" + +from collections import OrderedDict +from . import util +import re + + +def build_postprocessors(md, **kwargs): + """ Build the default postprocessors for Markdown. """ + postprocessors = util.Registry() + postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30) + postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20) + postprocessors.register(UnescapePostprocessor(), 'unescape', 10) + return postprocessors + + +class Postprocessor(util.Processor): + """ + Postprocessors are run after the ElementTree it converted back into text. + + Each Postprocessor implements a "run" method that takes a pointer to a + text string, modifies it as necessary and returns a text string. + + Postprocessors must extend markdown.Postprocessor. + + """ + + def run(self, text): + """ + Subclasses of Postprocessor should implement a `run` method, which + takes the html document as a single text string and returns a + (possibly modified) string. + + """ + pass # pragma: no cover + + +class RawHtmlPostprocessor(Postprocessor): + """ Restore raw html to the document. """ + + BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)') + + def run(self, text): + """ Iterate over html stash and restore html. """ + replacements = OrderedDict() + for i in range(self.md.htmlStash.html_counter): + html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) + if self.isblocklevel(html): + replacements["<p>{}</p>".format( + self.md.htmlStash.get_placeholder(i))] = html + replacements[self.md.htmlStash.get_placeholder(i)] = html + + def substitute_match(m): + key = m.group(0) + + if key not in replacements: + if key[3:-4] in replacements: + return f'<p>{ replacements[key[3:-4]] }</p>' + else: + return key + + return replacements[key] + + if replacements: + base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)' + pattern = re.compile(f'<p>{ base_placeholder }</p>|{ base_placeholder }') + processed_text = pattern.sub(substitute_match, text) + else: + return text + + if processed_text == text: + return processed_text + else: + return self.run(processed_text) + + def isblocklevel(self, html): + m = self.BLOCK_LEVEL_REGEX.match(html) + if m: + if m.group(1)[0] in ('!', '?', '@', '%'): + # Comment, php etc... + return True + return self.md.is_block_level(m.group(1)) + return False + + def stash_to_string(self, text): + """ Convert a stashed object to a string. """ + return str(text) + + +class AndSubstitutePostprocessor(Postprocessor): + """ Restore valid entities """ + + def run(self, text): + text = text.replace(util.AMP_SUBSTITUTE, "&") + return text + + +class UnescapePostprocessor(Postprocessor): + """ Restore escaped chars """ + + RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) + + def unescape(self, m): + return chr(int(m.group(1))) + + def run(self, text): + return self.RE.sub(self.unescape, text) diff --git a/Libs/markdown/preprocessors.py b/Libs/markdown/preprocessors.py new file mode 100644 index 0000000..e1023c5 --- /dev/null +++ b/Libs/markdown/preprocessors.py @@ -0,0 +1,82 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). + +PRE-PROCESSORS +============================================================================= + +Preprocessors work on source text before we start doing anything too +complicated. +""" + +from . import util +from .htmlparser import HTMLExtractor +import re + + +def build_preprocessors(md, **kwargs): + """ Build the default set of preprocessors used by Markdown. """ + preprocessors = util.Registry() + preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30) + preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20) + return preprocessors + + +class Preprocessor(util.Processor): + """ + Preprocessors are run after the text is broken into lines. + + Each preprocessor implements a "run" method that takes a pointer to a + list of lines of the document, modifies it as necessary and returns + either the same pointer or a pointer to a new list. + + Preprocessors must extend markdown.Preprocessor. + + """ + def run(self, lines): + """ + Each subclass of Preprocessor should override the `run` method, which + takes the document as a list of strings split by newlines and returns + the (possibly modified) list of lines. + + """ + pass # pragma: no cover + + +class NormalizeWhitespace(Preprocessor): + """ Normalize whitespace for consistent parsing. """ + + def run(self, lines): + source = '\n'.join(lines) + source = source.replace(util.STX, "").replace(util.ETX, "") + source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" + source = source.expandtabs(self.md.tab_length) + source = re.sub(r'(?<=\n) +\n', '\n', source) + return source.split('\n') + + +class HtmlBlockPreprocessor(Preprocessor): + """Remove html blocks from the text and store them for later retrieval.""" + + def run(self, lines): + source = '\n'.join(lines) + parser = HTMLExtractor(self.md) + parser.feed(source) + parser.close() + return ''.join(parser.cleandoc).split('\n') diff --git a/Libs/markdown/serializers.py b/Libs/markdown/serializers.py new file mode 100644 index 0000000..59bab18 --- /dev/null +++ b/Libs/markdown/serializers.py @@ -0,0 +1,189 @@ +# markdown/searializers.py +# +# Add x/html serialization to Elementree +# Taken from ElementTree 1.3 preview with slight modifications +# +# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# https://www.pythonware.com/ +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2007 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + + +from xml.etree.ElementTree import ProcessingInstruction +from xml.etree.ElementTree import Comment, ElementTree, QName +import re + +__all__ = ['to_html_string', 'to_xhtml_string'] + +HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", + "img", "input", "isindex", "link", "meta", "param") +RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) + +try: + HTML_EMPTY = set(HTML_EMPTY) +except NameError: # pragma: no cover + pass + + +def _raise_serialization_error(text): # pragma: no cover + raise TypeError( + "cannot serialize {!r} (type {})".format(text, type(text).__name__) + ) + + +def _escape_cdata(text): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if "&" in text: + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_attrib(text): + # escape attribute value + try: + if "&" in text: + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + if "\n" in text: + text = text.replace("\n", " ") + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _escape_attrib_html(text): + # escape attribute value + try: + if "&" in text: + # Only replace & when not part of an entity + text = RE_AMP.sub('&', text) + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if "\"" in text: + text = text.replace("\"", """) + return text + except (TypeError, AttributeError): # pragma: no cover + _raise_serialization_error(text) + + +def _serialize_html(write, elem, format): + tag = elem.tag + text = elem.text + if tag is Comment: + write("<!--%s-->" % _escape_cdata(text)) + elif tag is ProcessingInstruction: + write("<?%s?>" % _escape_cdata(text)) + elif tag is None: + if text: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, format) + else: + namespace_uri = None + if isinstance(tag, QName): + # QNAME objects store their data as a string: `{uri}tag` + if tag.text[:1] == "{": + namespace_uri, tag = tag.text[1:].split("}", 1) + else: + raise ValueError('QName objects must define a tag.') + write("<" + tag) + items = elem.items() + if items: + items = sorted(items) # lexical order + for k, v in items: + if isinstance(k, QName): + # Assume a text only QName + k = k.text + if isinstance(v, QName): + # Assume a text only QName + v = v.text + else: + v = _escape_attrib_html(v) + if k == v and format == 'html': + # handle boolean attributes + write(" %s" % v) + else: + write(' {}="{}"'.format(k, v)) + if namespace_uri: + write(' xmlns="%s"' % (_escape_attrib(namespace_uri))) + if format == "xhtml" and tag.lower() in HTML_EMPTY: + write(" />") + else: + write(">") + if text: + if tag.lower() in ["script", "style"]: + write(text) + else: + write(_escape_cdata(text)) + for e in elem: + _serialize_html(write, e, format) + if tag.lower() not in HTML_EMPTY: + write("</" + tag + ">") + if elem.tail: + write(_escape_cdata(elem.tail)) + + +def _write_html(root, format="html"): + assert root is not None + data = [] + write = data.append + _serialize_html(write, root, format) + return "".join(data) + + +# -------------------------------------------------------------------- +# public functions + +def to_html_string(element): + return _write_html(ElementTree(element).getroot(), format="html") + + +def to_xhtml_string(element): + return _write_html(ElementTree(element).getroot(), format="xhtml") diff --git a/Libs/markdown/test_tools.py b/Libs/markdown/test_tools.py new file mode 100644 index 0000000..2ce0e74 --- /dev/null +++ b/Libs/markdown/test_tools.py @@ -0,0 +1,220 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import os +import sys +import unittest +import textwrap +from . import markdown, Markdown, util + +try: + import tidylib +except ImportError: + tidylib = None + +__all__ = ['TestCase', 'LegacyTestCase', 'Kwargs'] + + +class TestCase(unittest.TestCase): + """ + A unittest.TestCase subclass with helpers for testing Markdown output. + + Define `default_kwargs` as a dict of keywords to pass to Markdown for each + test. The defaults can be overridden on individual tests. + + The `assertMarkdownRenders` method accepts the source text, the expected + output, and any keywords to pass to Markdown. The `default_kwargs` are used + except where overridden by `kwargs`. The output and expected output are passed + to `TestCase.assertMultiLineEqual`. An AssertionError is raised with a diff + if the actual output does not equal the expected output. + + The `dedent` method is available to dedent triple-quoted strings if + necessary. + + In all other respects, behaves as unittest.TestCase. + """ + + default_kwargs = {} + + def assertMarkdownRenders(self, source, expected, expected_attrs=None, **kwargs): + """ + Test that source Markdown text renders to expected output with given keywords. + + `expected_attrs` accepts a dict. Each key should be the name of an attribute + on the `Markdown` instance and the value should be the expected value after + the source text is parsed by Markdown. After the expected output is tested, + the expected value for each attribute is compared against the actual + attribute of the `Markdown` instance using `TestCase.assertEqual`. + """ + + expected_attrs = expected_attrs or {} + kws = self.default_kwargs.copy() + kws.update(kwargs) + md = Markdown(**kws) + output = md.convert(source) + self.assertMultiLineEqual(output, expected) + for key, value in expected_attrs.items(): + self.assertEqual(getattr(md, key), value) + + def dedent(self, text): + """ + Dedent text. + """ + + # TODO: If/when actual output ends with a newline, then use: + # return textwrap.dedent(text.strip('/n')) + return textwrap.dedent(text).strip() + + +class recursionlimit: + """ + A context manager which temporarily modifies the Python recursion limit. + + The testing framework, coverage, etc. may add an arbitrary number of levels to the depth. To maintain consistency + in the tests, the current stack depth is determined when called, then added to the provided limit. + + Example usage: + + with recursionlimit(20): + # test code here + + See https://stackoverflow.com/a/50120316/866026 + """ + + def __init__(self, limit): + self.limit = util._get_stack_depth() + limit + self.old_limit = sys.getrecursionlimit() + + def __enter__(self): + sys.setrecursionlimit(self.limit) + + def __exit__(self, type, value, tb): + sys.setrecursionlimit(self.old_limit) + + +######################### +# Legacy Test Framework # +######################### + + +class Kwargs(dict): + """ A dict like class for holding keyword arguments. """ + pass + + +def _normalize_whitespace(text): + """ Normalize whitespace for a string of html using tidylib. """ + output, errors = tidylib.tidy_fragment(text, options={ + 'drop_empty_paras': 0, + 'fix_backslash': 0, + 'fix_bad_comments': 0, + 'fix_uri': 0, + 'join_styles': 0, + 'lower_literals': 0, + 'merge_divs': 0, + 'output_xhtml': 1, + 'quote_ampersand': 0, + 'newline': 'LF' + }) + return output + + +class LegacyTestMeta(type): + def __new__(cls, name, bases, dct): + + def generate_test(infile, outfile, normalize, kwargs): + def test(self): + with open(infile, encoding="utf-8") as f: + input = f.read() + with open(outfile, encoding="utf-8") as f: + # Normalize line endings + # (on Windows, git may have altered line endings). + expected = f.read().replace("\r\n", "\n") + output = markdown(input, **kwargs) + if tidylib and normalize: + try: + expected = _normalize_whitespace(expected) + output = _normalize_whitespace(output) + except OSError: + self.skipTest("Tidylib's c library not available.") + elif normalize: + self.skipTest('Tidylib not available.') + self.assertMultiLineEqual(output, expected) + return test + + location = dct.get('location', '') + exclude = dct.get('exclude', []) + normalize = dct.get('normalize', False) + input_ext = dct.get('input_ext', '.txt') + output_ext = dct.get('output_ext', '.html') + kwargs = dct.get('default_kwargs', Kwargs()) + + if os.path.isdir(location): + for file in os.listdir(location): + infile = os.path.join(location, file) + if os.path.isfile(infile): + tname, ext = os.path.splitext(file) + if ext == input_ext: + outfile = os.path.join(location, tname + output_ext) + tname = tname.replace(' ', '_').replace('-', '_') + kws = kwargs.copy() + if tname in dct: + kws.update(dct[tname]) + test_name = 'test_%s' % tname + if tname not in exclude: + dct[test_name] = generate_test(infile, outfile, normalize, kws) + else: + dct[test_name] = unittest.skip('Excluded')(lambda: None) + + return type.__new__(cls, name, bases, dct) + + +class LegacyTestCase(unittest.TestCase, metaclass=LegacyTestMeta): + """ + A `unittest.TestCase` subclass for running Markdown's legacy file-based tests. + + A subclass should define various properties which point to a directory of + text-based test files and define various behaviors/defaults for those tests. + The following properties are supported: + + location: A path to the directory of test files. An absolute path is preferred. + exclude: A list of tests to exclude. Each test name should comprise the filename + without an extension. + normalize: A boolean value indicating if the HTML should be normalized. + Default: `False`. + input_ext: A string containing the file extension of input files. Default: `.txt`. + ouput_ext: A string containing the file extension of expected output files. + Default: `html`. + default_kwargs: A `Kwargs` instance which stores the default set of keyword + arguments for all test files in the directory. + + In addition, properties can be defined for each individual set of test files within + the directory. The property should be given the name of the file without the file + extension. Any spaces and dashes in the filename should be replaced with + underscores. The value of the property should be a `Kwargs` instance which + contains the keyword arguments that should be passed to `Markdown` for that + test file. The keyword arguments will "update" the `default_kwargs`. + + When the class instance is created, it will walk the given directory and create + a separate unitttest for each set of test files using the naming scheme: + `test_filename`. One unittest will be run for each set of input and output files. + """ + pass diff --git a/Libs/markdown/treeprocessors.py b/Libs/markdown/treeprocessors.py new file mode 100644 index 0000000..a475fde --- /dev/null +++ b/Libs/markdown/treeprocessors.py @@ -0,0 +1,436 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import xml.etree.ElementTree as etree +from . import util +from . import inlinepatterns + + +def build_treeprocessors(md, **kwargs): + """ Build the default treeprocessors for Markdown. """ + treeprocessors = util.Registry() + treeprocessors.register(InlineProcessor(md), 'inline', 20) + treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10) + return treeprocessors + + +def isString(s): + """ Check if it's string """ + if not isinstance(s, util.AtomicString): + return isinstance(s, str) + return False + + +class Treeprocessor(util.Processor): + """ + Treeprocessors are run on the ElementTree object before serialization. + + Each Treeprocessor implements a "run" method that takes a pointer to an + ElementTree, modifies it as necessary and returns an ElementTree + object. + + Treeprocessors must extend markdown.Treeprocessor. + + """ + def run(self, root): + """ + Subclasses of Treeprocessor should implement a `run` method, which + takes a root ElementTree. This method can return another ElementTree + object, and the existing root ElementTree will be replaced, or it can + modify the current tree and return None. + """ + pass # pragma: no cover + + +class InlineProcessor(Treeprocessor): + """ + A Treeprocessor that traverses a tree, applying inline patterns. + """ + + def __init__(self, md): + self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX + self.__placeholder_suffix = util.ETX + self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ + + len(self.__placeholder_suffix) + self.__placeholder_re = util.INLINE_PLACEHOLDER_RE + self.md = md + self.inlinePatterns = md.inlinePatterns + self.ancestors = [] + + @property + @util.deprecated("Use 'md' instead.") + def markdown(self): + # TODO: remove this later + return self.md + + def __makePlaceholder(self, type): + """ Generate a placeholder """ + id = "%04d" % len(self.stashed_nodes) + hash = util.INLINE_PLACEHOLDER % id + return hash, id + + def __findPlaceholder(self, data, index): + """ + Extract id from data string, start from index + + Keyword arguments: + + * data: string + * index: index, from which we start search + + Returns: placeholder id and string index, after the found placeholder. + + """ + m = self.__placeholder_re.search(data, index) + if m: + return m.group(1), m.end() + else: + return None, index + 1 + + def __stashNode(self, node, type): + """ Add node to stash """ + placeholder, id = self.__makePlaceholder(type) + self.stashed_nodes[id] = node + return placeholder + + def __handleInline(self, data, patternIndex=0): + """ + Process string with inline patterns and replace it + with placeholders + + Keyword arguments: + + * data: A line of Markdown text + * patternIndex: The index of the inlinePattern to start with + + Returns: String with placeholders. + + """ + if not isinstance(data, util.AtomicString): + startIndex = 0 + count = len(self.inlinePatterns) + while patternIndex < count: + data, matched, startIndex = self.__applyPattern( + self.inlinePatterns[patternIndex], data, patternIndex, startIndex + ) + if not matched: + patternIndex += 1 + return data + + def __processElementText(self, node, subnode, isText=True): + """ + Process placeholders in Element.text or Element.tail + of Elements popped from self.stashed_nodes. + + Keywords arguments: + + * node: parent node + * subnode: processing node + * isText: bool variable, True - it's text, False - it's tail + + Returns: None + + """ + if isText: + text = subnode.text + subnode.text = None + else: + text = subnode.tail + subnode.tail = None + + childResult = self.__processPlaceholders(text, subnode, isText) + + if not isText and node is not subnode: + pos = list(node).index(subnode) + 1 + else: + pos = 0 + + childResult.reverse() + for newChild in childResult: + node.insert(pos, newChild[0]) + + def __processPlaceholders(self, data, parent, isText=True): + """ + Process string with placeholders and generate ElementTree tree. + + Keyword arguments: + + * data: string with placeholders instead of ElementTree elements. + * parent: Element, which contains processing inline data + + Returns: list with ElementTree elements with applied inline patterns. + + """ + def linkText(text): + if text: + if result: + if result[-1][0].tail: + result[-1][0].tail += text + else: + result[-1][0].tail = text + elif not isText: + if parent.tail: + parent.tail += text + else: + parent.tail = text + else: + if parent.text: + parent.text += text + else: + parent.text = text + result = [] + strartIndex = 0 + while data: + index = data.find(self.__placeholder_prefix, strartIndex) + if index != -1: + id, phEndIndex = self.__findPlaceholder(data, index) + + if id in self.stashed_nodes: + node = self.stashed_nodes.get(id) + + if index > 0: + text = data[strartIndex:index] + linkText(text) + + if not isString(node): # it's Element + for child in [node] + list(node): + if child.tail: + if child.tail.strip(): + self.__processElementText( + node, child, False + ) + if child.text: + if child.text.strip(): + self.__processElementText(child, child) + else: # it's just a string + linkText(node) + strartIndex = phEndIndex + continue + + strartIndex = phEndIndex + result.append((node, self.ancestors[:])) + + else: # wrong placeholder + end = index + len(self.__placeholder_prefix) + linkText(data[strartIndex:end]) + strartIndex = end + else: + text = data[strartIndex:] + if isinstance(data, util.AtomicString): + # We don't want to loose the AtomicString + text = util.AtomicString(text) + linkText(text) + data = "" + + return result + + def __applyPattern(self, pattern, data, patternIndex, startIndex=0): + """ + Check if the line fits the pattern, create the necessary + elements, add it to stashed_nodes. + + Keyword arguments: + + * data: the text to be processed + * pattern: the pattern to be checked + * patternIndex: index of current pattern + * startIndex: string index, from which we start searching + + Returns: String with placeholders instead of ElementTree elements. + + """ + new_style = isinstance(pattern, inlinepatterns.InlineProcessor) + + for exclude in pattern.ANCESTOR_EXCLUDES: + if exclude.lower() in self.ancestors: + return data, False, 0 + + if new_style: + match = None + # Since handleMatch may reject our first match, + # we iterate over the buffer looking for matches + # until we can't find any more. + for match in pattern.getCompiledRegExp().finditer(data, startIndex): + node, start, end = pattern.handleMatch(match, data) + if start is None or end is None: + startIndex += match.end(0) + match = None + continue + break + else: # pragma: no cover + match = pattern.getCompiledRegExp().match(data[startIndex:]) + leftData = data[:startIndex] + + if not match: + return data, False, 0 + + if not new_style: # pragma: no cover + node = pattern.handleMatch(match) + start = match.start(0) + end = match.end(0) + + if node is None: + return data, True, end + + if not isString(node): + if not isinstance(node.text, util.AtomicString): + # We need to process current node too + for child in [node] + list(node): + if not isString(node): + if child.text: + self.ancestors.append(child.tag.lower()) + child.text = self.__handleInline( + child.text, patternIndex + 1 + ) + self.ancestors.pop() + if child.tail: + child.tail = self.__handleInline( + child.tail, patternIndex + ) + + placeholder = self.__stashNode(node, pattern.type()) + + if new_style: + return "{}{}{}".format(data[:start], + placeholder, data[end:]), True, 0 + else: # pragma: no cover + return "{}{}{}{}".format(leftData, + match.group(1), + placeholder, match.groups()[-1]), True, 0 + + def __build_ancestors(self, parent, parents): + """Build the ancestor list.""" + ancestors = [] + while parent is not None: + if parent is not None: + ancestors.append(parent.tag.lower()) + parent = self.parent_map.get(parent) + ancestors.reverse() + parents.extend(ancestors) + + def run(self, tree, ancestors=None): + """Apply inline patterns to a parsed Markdown tree. + + Iterate over ElementTree, find elements with inline tag, apply inline + patterns and append newly created Elements to tree. If you don't + want to process your data with inline patterns, instead of normal + string, use subclass AtomicString: + + node.text = markdown.AtomicString("This will not be processed.") + + Arguments: + + * tree: ElementTree object, representing Markdown tree. + * ancestors: List of parent tag names that precede the tree node (if needed). + + Returns: ElementTree object with applied inline patterns. + + """ + self.stashed_nodes = {} + + # Ensure a valid parent list, but copy passed in lists + # to ensure we don't have the user accidentally change it on us. + tree_parents = [] if ancestors is None else ancestors[:] + + self.parent_map = {c: p for p in tree.iter() for c in p} + stack = [(tree, tree_parents)] + + while stack: + currElement, parents = stack.pop() + + self.ancestors = parents + self.__build_ancestors(currElement, self.ancestors) + + insertQueue = [] + for child in currElement: + if child.text and not isinstance( + child.text, util.AtomicString + ): + self.ancestors.append(child.tag.lower()) + text = child.text + child.text = None + lst = self.__processPlaceholders( + self.__handleInline(text), child + ) + for item in lst: + self.parent_map[item[0]] = child + stack += lst + insertQueue.append((child, lst)) + self.ancestors.pop() + if child.tail: + tail = self.__handleInline(child.tail) + dumby = etree.Element('d') + child.tail = None + tailResult = self.__processPlaceholders(tail, dumby, False) + if dumby.tail: + child.tail = dumby.tail + pos = list(currElement).index(child) + 1 + tailResult.reverse() + for newChild in tailResult: + self.parent_map[newChild[0]] = currElement + currElement.insert(pos, newChild[0]) + if len(child): + self.parent_map[child] = currElement + stack.append((child, self.ancestors[:])) + + for element, lst in insertQueue: + for i, obj in enumerate(lst): + newChild = obj[0] + element.insert(i, newChild) + return tree + + +class PrettifyTreeprocessor(Treeprocessor): + """ Add linebreaks to the html document. """ + + def _prettifyETree(self, elem): + """ Recursively add linebreaks to ElementTree children. """ + + i = "\n" + if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']: + if (not elem.text or not elem.text.strip()) \ + and len(elem) and self.md.is_block_level(elem[0].tag): + elem.text = i + for e in elem: + if self.md.is_block_level(e.tag): + self._prettifyETree(e) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + if not elem.tail or not elem.tail.strip(): + elem.tail = i + + def run(self, root): + """ Add linebreaks to ElementTree root object. """ + + self._prettifyETree(root) + # Do <br />'s separately as they are often in the middle of + # inline content and missed by _prettifyETree. + brs = root.iter('br') + for br in brs: + if not br.tail or not br.tail.strip(): + br.tail = '\n' + else: + br.tail = '\n%s' % br.tail + # Clean up extra empty lines at end of code blocks. + pres = root.iter('pre') + for pre in pres: + if len(pre) and pre[0].tag == 'code': + pre[0].text = util.AtomicString(pre[0].text.rstrip() + '\n') diff --git a/Libs/markdown/util.py b/Libs/markdown/util.py new file mode 100644 index 0000000..46cfe68 --- /dev/null +++ b/Libs/markdown/util.py @@ -0,0 +1,485 @@ +""" +Python Markdown + +A Python implementation of John Gruber's Markdown. + +Documentation: https://python-markdown.github.io/ +GitHub: https://github.com/Python-Markdown/markdown/ +PyPI: https://pypi.org/project/Markdown/ + +Started by Manfred Stienstra (http://www.dwerg.net/). +Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). +Currently maintained by Waylan Limberg (https://github.com/waylan), +Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). + +Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) +Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) +Copyright 2004 Manfred Stienstra (the original version) + +License: BSD (see LICENSE.md for details). +""" + +import re +import sys +import warnings +import xml.etree.ElementTree +from collections import namedtuple +from functools import wraps +from itertools import count + +from .pep562 import Pep562 + +if sys.version_info >= (3, 10): + from importlib import metadata +else: + # <PY310 use backport + import importlib_metadata as metadata + +PY37 = (3, 7) <= sys.version_info + + +# TODO: Remove deprecated variables in a future release. +__deprecated__ = { + 'etree': ('xml.etree.ElementTree', xml.etree.ElementTree), + 'string_type': ('str', str), + 'text_type': ('str', str), + 'int2str': ('chr', chr), + 'iterrange': ('range', range) +} + + +""" +Constants you might want to modify +----------------------------------------------------------------------------- +""" + + +BLOCK_LEVEL_ELEMENTS = [ + # Elements which are invalid to wrap in a `<p>` tag. + # See https://w3c.github.io/html/grouping-content.html#the-p-element + 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', + 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', + 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', + 'p', 'pre', 'section', 'table', 'ul', + # Other elements which Markdown should not be mucking up the contents of. + 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend', + 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', + 'style', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video' +] + +# Placeholders +STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder +ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder +INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" +INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX +INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') +AMP_SUBSTITUTE = STX+"amp"+ETX +HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX +HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') +TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX + + +""" +Constants you probably do not need to change +----------------------------------------------------------------------------- +""" + +# Only load extension entry_points once. +INSTALLED_EXTENSIONS = metadata.entry_points(group='markdown.extensions') +RTL_BIDI_RANGES = ( + ('\u0590', '\u07FF'), + # Hebrew (0590-05FF), Arabic (0600-06FF), + # Syriac (0700-074F), Arabic supplement (0750-077F), + # Thaana (0780-07BF), Nko (07C0-07FF). + ('\u2D30', '\u2D7F') # Tifinagh +) + + +""" +AUXILIARY GLOBAL FUNCTIONS +============================================================================= +""" + + +def deprecated(message, stacklevel=2): + """ + Raise a DeprecationWarning when wrapped function/method is called. + + Usage: + @deprecated("This method will be removed in version X; use Y instead.") + def some_method()" + pass + """ + def wrapper(func): + @wraps(func) + def deprecated_func(*args, **kwargs): + warnings.warn( + f"'{func.__name__}' is deprecated. {message}", + category=DeprecationWarning, + stacklevel=stacklevel + ) + return func(*args, **kwargs) + return deprecated_func + return wrapper + + +@deprecated("Use 'Markdown.is_block_level' instead.") +def isBlockLevel(tag): + """Check if the tag is a block level HTML tag.""" + if isinstance(tag, str): + return tag.lower().rstrip('/') in BLOCK_LEVEL_ELEMENTS + # Some ElementTree tags are not strings, so return False. + return False + + +def parseBoolValue(value, fail_on_errors=True, preserve_none=False): + """Parses a string representing bool value. If parsing was successful, + returns True or False. If preserve_none=True, returns True, False, + or None. If parsing was not successful, raises ValueError, or, if + fail_on_errors=False, returns None.""" + if not isinstance(value, str): + if preserve_none and value is None: + return value + return bool(value) + elif preserve_none and value.lower() == 'none': + return None + elif value.lower() in ('true', 'yes', 'y', 'on', '1'): + return True + elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): + return False + elif fail_on_errors: + raise ValueError('Cannot parse bool value: %r' % value) + + +def code_escape(text): + """Escape code.""" + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + return text + + +def _get_stack_depth(size=2): + """Get current stack depth, performantly. + """ + frame = sys._getframe(size) + + for size in count(size): + frame = frame.f_back + if not frame: + return size + + +def nearing_recursion_limit(): + """Return true if current stack depth is within 100 of maximum limit.""" + return sys.getrecursionlimit() - _get_stack_depth() < 100 + + +""" +MISC AUXILIARY CLASSES +============================================================================= +""" + + +class AtomicString(str): + """A string which should not be further processed.""" + pass + + +class Processor: + def __init__(self, md=None): + self.md = md + + @property + @deprecated("Use 'md' instead.") + def markdown(self): + # TODO: remove this later + return self.md + + +class HtmlStash: + """ + This class is used for stashing HTML objects that we extract + in the beginning and replace with place-holders. + """ + + def __init__(self): + """ Create a HtmlStash. """ + self.html_counter = 0 # for counting inline html segments + self.rawHtmlBlocks = [] + self.tag_counter = 0 + self.tag_data = [] # list of dictionaries in the order tags appear + + def store(self, html): + """ + Saves an HTML segment for later reinsertion. Returns a + placeholder string that needs to be inserted into the + document. + + Keyword arguments: + + * html: an html segment + + Returns : a placeholder string + + """ + self.rawHtmlBlocks.append(html) + placeholder = self.get_placeholder(self.html_counter) + self.html_counter += 1 + return placeholder + + def reset(self): + self.html_counter = 0 + self.rawHtmlBlocks = [] + + def get_placeholder(self, key): + return HTML_PLACEHOLDER % key + + def store_tag(self, tag, attrs, left_index, right_index): + """Store tag data and return a placeholder.""" + self.tag_data.append({'tag': tag, 'attrs': attrs, + 'left_index': left_index, + 'right_index': right_index}) + placeholder = TAG_PLACEHOLDER % str(self.tag_counter) + self.tag_counter += 1 # equal to the tag's index in self.tag_data + return placeholder + + +# Used internally by `Registry` for each item in its sorted list. +# Provides an easier to read API when editing the code later. +# For example, `item.name` is more clear than `item[0]`. +_PriorityItem = namedtuple('PriorityItem', ['name', 'priority']) + + +class Registry: + """ + A priority sorted registry. + + A `Registry` instance provides two public methods to alter the data of the + registry: `register` and `deregister`. Use `register` to add items and + `deregister` to remove items. See each method for specifics. + + When registering an item, a "name" and a "priority" must be provided. All + items are automatically sorted by "priority" from highest to lowest. The + "name" is used to remove ("deregister") and get items. + + A `Registry` instance it like a list (which maintains order) when reading + data. You may iterate over the items, get an item and get a count (length) + of all items. You may also check that the registry contains an item. + + When getting an item you may use either the index of the item or the + string-based "name". For example: + + registry = Registry() + registry.register(SomeItem(), 'itemname', 20) + # Get the item by index + item = registry[0] + # Get the item by name + item = registry['itemname'] + + When checking that the registry contains an item, you may use either the + string-based "name", or a reference to the actual item. For example: + + someitem = SomeItem() + registry.register(someitem, 'itemname', 20) + # Contains the name + assert 'itemname' in registry + # Contains the item instance + assert someitem in registry + + The method `get_index_for_name` is also available to obtain the index of + an item using that item's assigned "name". + """ + + def __init__(self): + self._data = {} + self._priority = [] + self._is_sorted = False + + def __contains__(self, item): + if isinstance(item, str): + # Check if an item exists by this name. + return item in self._data.keys() + # Check if this instance exists. + return item in self._data.values() + + def __iter__(self): + self._sort() + return iter([self._data[k] for k, p in self._priority]) + + def __getitem__(self, key): + self._sort() + if isinstance(key, slice): + data = Registry() + for k, p in self._priority[key]: + data.register(self._data[k], k, p) + return data + if isinstance(key, int): + return self._data[self._priority[key].name] + return self._data[key] + + def __len__(self): + return len(self._priority) + + def __repr__(self): + return '<{}({})>'.format(self.__class__.__name__, list(self)) + + def get_index_for_name(self, name): + """ + Return the index of the given name. + """ + if name in self: + self._sort() + return self._priority.index( + [x for x in self._priority if x.name == name][0] + ) + raise ValueError('No item named "{}" exists.'.format(name)) + + def register(self, item, name, priority): + """ + Add an item to the registry with the given name and priority. + + Parameters: + + * `item`: The item being registered. + * `name`: A string used to reference the item. + * `priority`: An integer or float used to sort against all items. + + If an item is registered with a "name" which already exists, the + existing item is replaced with the new item. Treat carefully as the + old item is lost with no way to recover it. The new item will be + sorted according to its priority and will **not** retain the position + of the old item. + """ + if name in self: + # Remove existing item of same name first + self.deregister(name) + self._is_sorted = False + self._data[name] = item + self._priority.append(_PriorityItem(name, priority)) + + def deregister(self, name, strict=True): + """ + Remove an item from the registry. + + Set `strict=False` to fail silently. + """ + try: + index = self.get_index_for_name(name) + del self._priority[index] + del self._data[name] + except ValueError: + if strict: + raise + + def _sort(self): + """ + Sort the registry by priority from highest to lowest. + + This method is called internally and should never be explicitly called. + """ + if not self._is_sorted: + self._priority.sort(key=lambda item: item.priority, reverse=True) + self._is_sorted = True + + # Deprecated Methods which provide a smooth transition from OrderedDict + + def __setitem__(self, key, value): + """ Register item with priority 5 less than lowest existing priority. """ + if isinstance(key, str): + warnings.warn( + 'Using setitem to register a processor or pattern is deprecated. ' + 'Use the `register` method instead.', + DeprecationWarning, + stacklevel=2, + ) + if key in self: + # Key already exists, replace without altering priority + self._data[key] = value + return + if len(self) == 0: + # This is the first item. Set priority to 50. + priority = 50 + else: + self._sort() + priority = self._priority[-1].priority - 5 + self.register(value, key, priority) + else: + raise TypeError + + def __delitem__(self, key): + """ Deregister an item by name. """ + if key in self: + self.deregister(key) + warnings.warn( + 'Using del to remove a processor or pattern is deprecated. ' + 'Use the `deregister` method instead.', + DeprecationWarning, + stacklevel=2, + ) + else: + raise KeyError('Cannot delete key {}, not registered.'.format(key)) + + def add(self, key, value, location): + """ Register a key by location. """ + if len(self) == 0: + # This is the first item. Set priority to 50. + priority = 50 + elif location == '_begin': + self._sort() + # Set priority 5 greater than highest existing priority + priority = self._priority[0].priority + 5 + elif location == '_end': + self._sort() + # Set priority 5 less than lowest existing priority + priority = self._priority[-1].priority - 5 + elif location.startswith('<') or location.startswith('>'): + # Set priority halfway between existing priorities. + i = self.get_index_for_name(location[1:]) + if location.startswith('<'): + after = self._priority[i].priority + if i > 0: + before = self._priority[i-1].priority + else: + # Location is first item` + before = after + 10 + else: + # location.startswith('>') + before = self._priority[i].priority + if i < len(self) - 1: + after = self._priority[i+1].priority + else: + # location is last item + after = before - 10 + priority = before - ((before - after) / 2) + else: + raise ValueError('Not a valid location: "%s". Location key ' + 'must start with a ">" or "<".' % location) + self.register(value, key, priority) + warnings.warn( + 'Using the add method to register a processor or pattern is deprecated. ' + 'Use the `register` method instead.', + DeprecationWarning, + stacklevel=2, + ) + + +def __getattr__(name): + """Get attribute.""" + + deprecated = __deprecated__.get(name) + if deprecated: + warnings.warn( + "'{}' is deprecated. Use '{}' instead.".format(name, deprecated[0]), + category=DeprecationWarning, + stacklevel=(3 if PY37 else 4) + ) + return deprecated[1] + raise AttributeError("module '{}' has no attribute '{}'".format(__name__, name)) + + +if not PY37: + Pep562(__name__) diff --git a/Pages/index.md b/Pages/index.md new file mode 100755 index 0000000..a73c5eb --- /dev/null +++ b/Pages/index.md @@ -0,0 +1,11 @@ +% - Commento +% Template: Standard.html +% Background: #eeddff + +# Prova +Si testa il semi-generatore di siti statici + +% - Altro commento + +### Sottocoso +Il generatore dovrebbe saper gestire i sottocosi diff --git a/Pages/test/test.md b/Pages/test/test.md new file mode 100644 index 0000000..19b1e62 --- /dev/null +++ b/Pages/test/test.md @@ -0,0 +1 @@ +test. diff --git a/Parts/Standard/BottomBox.html b/Parts/Standard/BottomBox.html new file mode 100644 index 0000000..e69de29 diff --git a/Parts/Standard/LeftBox.html b/Parts/Standard/LeftBox.html new file mode 100644 index 0000000..e69de29 diff --git a/Parts/Standard/TopBox.html b/Parts/Standard/TopBox.html new file mode 100644 index 0000000..e69de29 diff --git a/Templates/Standard.html b/Templates/Standard.html new file mode 100755 index 0000000..efefb4d --- /dev/null +++ b/Templates/Standard.html @@ -0,0 +1,34 @@ +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <link rel="stylesheet" href="Style.css"> + <script src="/App.js"></script> + <title>[HTML:Page:Title] - postoctt + + + +

+
+ [HTML:Part:Standard/TopBox.html] +
+
+
+ [HTML:Part:Standard/LeftBox.html] +
+
+ [HTML:Page:RightBox] +
+
+ [HTML:Page:MainBox] +
+
+
+ [HTML:Part:Standard/BottomBox.html] +
+
+ +