mirror of
https://gitlab.com/octtspacc/staticoso
synced 2025-06-05 22:09:23 +02:00
Transition to monorepo on a new Dev branch
This commit is contained in:
892
App/Source/Libs/markdown/inlinepatterns.py
Normal file
892
App/Source/Libs/markdown/inlinepatterns.py
Normal file
@ -0,0 +1,892 @@
|
||||
"""
|
||||
Python Markdown
|
||||
|
||||
A Python implementation of John Gruber's Markdown.
|
||||
|
||||
Documentation: https://python-markdown.github.io/
|
||||
GitHub: https://github.com/Python-Markdown/markdown/
|
||||
PyPI: https://pypi.org/project/Markdown/
|
||||
|
||||
Started by Manfred Stienstra (http://www.dwerg.net/).
|
||||
Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
|
||||
Currently maintained by Waylan Limberg (https://github.com/waylan),
|
||||
Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
|
||||
|
||||
Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
|
||||
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
|
||||
Copyright 2004 Manfred Stienstra (the original version)
|
||||
|
||||
License: BSD (see LICENSE.md for details).
|
||||
|
||||
INLINE PATTERNS
|
||||
=============================================================================
|
||||
|
||||
Inline patterns such as *emphasis* are handled by means of auxiliary
|
||||
objects, one per pattern. Pattern objects must be instances of classes
|
||||
that extend markdown.Pattern. Each pattern object uses a single regular
|
||||
expression and needs support the following methods:
|
||||
|
||||
pattern.getCompiledRegExp() # returns a regular expression
|
||||
|
||||
pattern.handleMatch(m) # takes a match object and returns
|
||||
# an ElementTree element or just plain text
|
||||
|
||||
All of python markdown's built-in patterns subclass from Pattern,
|
||||
but you can add additional patterns that don't.
|
||||
|
||||
Also note that all the regular expressions used by inline must
|
||||
capture the whole block. For this reason, they all start with
|
||||
'^(.*)' and end with '(.*)!'. In case with built-in expression
|
||||
Pattern takes care of adding the "^(.*)" and "(.*)!".
|
||||
|
||||
Finally, the order in which regular expressions are applied is very
|
||||
important - e.g. if we first replace http://.../ links with <a> tags
|
||||
and _then_ try to replace inline html, we would end up with a mess.
|
||||
So, we apply the expressions in the following order:
|
||||
|
||||
* escape and backticks have to go before everything else, so
|
||||
that we can preempt any markdown patterns by escaping them.
|
||||
|
||||
* then we handle auto-links (must be done before inline html)
|
||||
|
||||
* then we handle inline HTML. At this point we will simply
|
||||
replace all inline HTML strings with a placeholder and add
|
||||
the actual HTML to a hash.
|
||||
|
||||
* then inline images (must be done before links)
|
||||
|
||||
* then bracketed links, first regular then reference-style
|
||||
|
||||
* finally we apply strong and emphasis
|
||||
"""
|
||||
|
||||
from . import util
|
||||
from collections import namedtuple
|
||||
import re
|
||||
import xml.etree.ElementTree as etree
|
||||
try: # pragma: no cover
|
||||
from html import entities
|
||||
except ImportError: # pragma: no cover
|
||||
import htmlentitydefs as entities
|
||||
|
||||
|
||||
def build_inlinepatterns(md, **kwargs):
|
||||
""" Build the default set of inline patterns for Markdown. """
|
||||
inlinePatterns = util.Registry()
|
||||
inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190)
|
||||
inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180)
|
||||
inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170)
|
||||
inlinePatterns.register(LinkInlineProcessor(LINK_RE, md), 'link', 160)
|
||||
inlinePatterns.register(ImageInlineProcessor(IMAGE_LINK_RE, md), 'image_link', 150)
|
||||
inlinePatterns.register(
|
||||
ImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'image_reference', 140
|
||||
)
|
||||
inlinePatterns.register(
|
||||
ShortReferenceInlineProcessor(REFERENCE_RE, md), 'short_reference', 130
|
||||
)
|
||||
inlinePatterns.register(
|
||||
ShortImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'short_image_ref', 125
|
||||
)
|
||||
inlinePatterns.register(AutolinkInlineProcessor(AUTOLINK_RE, md), 'autolink', 120)
|
||||
inlinePatterns.register(AutomailInlineProcessor(AUTOMAIL_RE, md), 'automail', 110)
|
||||
inlinePatterns.register(SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br'), 'linebreak', 100)
|
||||
inlinePatterns.register(HtmlInlineProcessor(HTML_RE, md), 'html', 90)
|
||||
inlinePatterns.register(HtmlInlineProcessor(ENTITY_RE, md), 'entity', 80)
|
||||
inlinePatterns.register(SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 70)
|
||||
inlinePatterns.register(AsteriskProcessor(r'\*'), 'em_strong', 60)
|
||||
inlinePatterns.register(UnderscoreProcessor(r'_'), 'em_strong2', 50)
|
||||
return inlinePatterns
|
||||
|
||||
|
||||
"""
|
||||
The actual regular expressions for patterns
|
||||
-----------------------------------------------------------------------------
|
||||
"""
|
||||
|
||||
NOIMG = r'(?<!\!)'
|
||||
|
||||
# `e=f()` or ``e=f("`")``
|
||||
BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))'
|
||||
|
||||
# \<
|
||||
ESCAPE_RE = r'\\(.)'
|
||||
|
||||
# *emphasis*
|
||||
EMPHASIS_RE = r'(\*)([^\*]+)\1'
|
||||
|
||||
# **strong**
|
||||
STRONG_RE = r'(\*{2})(.+?)\1'
|
||||
|
||||
# __smart__strong__
|
||||
SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)'
|
||||
|
||||
# _smart_emphasis_
|
||||
SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
|
||||
|
||||
# __strong _em__
|
||||
SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)'
|
||||
|
||||
# ***strongem*** or ***em*strong**
|
||||
EM_STRONG_RE = r'(\*)\1{2}(.+?)\1(.*?)\1{2}'
|
||||
|
||||
# ___strongem___ or ___em_strong__
|
||||
EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}'
|
||||
|
||||
# ***strong**em*
|
||||
STRONG_EM_RE = r'(\*)\1{2}(.+?)\1{2}(.*?)\1'
|
||||
|
||||
# ___strong__em_
|
||||
STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1'
|
||||
|
||||
# **strong*em***
|
||||
STRONG_EM3_RE = r'(\*)\1(?!\1)([^*]+?)\1(?!\1)(.+?)\1{3}'
|
||||
|
||||
# [text](url) or [text](<url>) or [text](url "title")
|
||||
LINK_RE = NOIMG + r'\['
|
||||
|
||||
#  or 
|
||||
IMAGE_LINK_RE = r'\!\['
|
||||
|
||||
# [Google][3]
|
||||
REFERENCE_RE = LINK_RE
|
||||
|
||||
# ![alt text][2]
|
||||
IMAGE_REFERENCE_RE = IMAGE_LINK_RE
|
||||
|
||||
# stand-alone * or _
|
||||
NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'
|
||||
|
||||
# <http://www.123.com>
|
||||
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
|
||||
|
||||
# <me@example.com>
|
||||
AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>'
|
||||
|
||||
# <...>
|
||||
HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!<!--|-->).)*--)>)'
|
||||
|
||||
# "&" (decimal) or "&" (hex) or "&" (named)
|
||||
ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)'
|
||||
|
||||
# two spaces at end of line
|
||||
LINE_BREAK_RE = r' \n'
|
||||
|
||||
|
||||
def dequote(string):
|
||||
"""Remove quotes from around a string."""
|
||||
if ((string.startswith('"') and string.endswith('"')) or
|
||||
(string.startswith("'") and string.endswith("'"))):
|
||||
return string[1:-1]
|
||||
else:
|
||||
return string
|
||||
|
||||
|
||||
class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])):
|
||||
"""Emphasis/strong pattern item."""
|
||||
|
||||
|
||||
"""
|
||||
The pattern classes
|
||||
-----------------------------------------------------------------------------
|
||||
"""
|
||||
|
||||
|
||||
class Pattern: # pragma: no cover
|
||||
"""Base class that inline patterns subclass. """
|
||||
|
||||
ANCESTOR_EXCLUDES = tuple()
|
||||
|
||||
def __init__(self, pattern, md=None):
|
||||
"""
|
||||
Create an instant of an inline pattern.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* pattern: A regular expression that matches a pattern
|
||||
|
||||
"""
|
||||
self.pattern = pattern
|
||||
self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern,
|
||||
re.DOTALL | re.UNICODE)
|
||||
|
||||
self.md = md
|
||||
|
||||
@property
|
||||
@util.deprecated("Use 'md' instead.")
|
||||
def markdown(self):
|
||||
# TODO: remove this later
|
||||
return self.md
|
||||
|
||||
def getCompiledRegExp(self):
|
||||
""" Return a compiled regular expression. """
|
||||
return self.compiled_re
|
||||
|
||||
def handleMatch(self, m):
|
||||
"""Return a ElementTree element from the given match.
|
||||
|
||||
Subclasses should override this method.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* m: A re match object containing a match of the pattern.
|
||||
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
||||
def type(self):
|
||||
""" Return class name, to define pattern type """
|
||||
return self.__class__.__name__
|
||||
|
||||
def unescape(self, text):
|
||||
""" Return unescaped text given text with an inline placeholder. """
|
||||
try:
|
||||
stash = self.md.treeprocessors['inline'].stashed_nodes
|
||||
except KeyError: # pragma: no cover
|
||||
return text
|
||||
|
||||
def get_stash(m):
|
||||
id = m.group(1)
|
||||
if id in stash:
|
||||
value = stash.get(id)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
else:
|
||||
# An etree Element - return text content only
|
||||
return ''.join(value.itertext())
|
||||
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
|
||||
|
||||
|
||||
class InlineProcessor(Pattern):
|
||||
"""
|
||||
Base class that inline patterns subclass.
|
||||
|
||||
This is the newer style inline processor that uses a more
|
||||
efficient and flexible search approach.
|
||||
"""
|
||||
|
||||
def __init__(self, pattern, md=None):
|
||||
"""
|
||||
Create an instant of an inline pattern.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* pattern: A regular expression that matches a pattern
|
||||
|
||||
"""
|
||||
self.pattern = pattern
|
||||
self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
|
||||
|
||||
# Api for Markdown to pass safe_mode into instance
|
||||
self.safe_mode = False
|
||||
self.md = md
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
"""Return a ElementTree element from the given match and the
|
||||
start and end index of the matched text.
|
||||
|
||||
If `start` and/or `end` are returned as `None`, it will be
|
||||
assumed that the processor did not find a valid region of text.
|
||||
|
||||
Subclasses should override this method.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* m: A re match object containing a match of the pattern.
|
||||
* data: The buffer current under analysis
|
||||
|
||||
Returns:
|
||||
|
||||
* el: The ElementTree element, text or None.
|
||||
* start: The start of the region that has been matched or None.
|
||||
* end: The end of the region that has been matched or None.
|
||||
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
||||
|
||||
class SimpleTextPattern(Pattern): # pragma: no cover
|
||||
""" Return a simple text of group(2) of a Pattern. """
|
||||
def handleMatch(self, m):
|
||||
return m.group(2)
|
||||
|
||||
|
||||
class SimpleTextInlineProcessor(InlineProcessor):
|
||||
""" Return a simple text of group(1) of a Pattern. """
|
||||
def handleMatch(self, m, data):
|
||||
return m.group(1), m.start(0), m.end(0)
|
||||
|
||||
|
||||
class EscapeInlineProcessor(InlineProcessor):
|
||||
""" Return an escaped character. """
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
char = m.group(1)
|
||||
if char in self.md.ESCAPED_CHARS:
|
||||
return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0)
|
||||
else:
|
||||
return None, m.start(0), m.end(0)
|
||||
|
||||
|
||||
class SimpleTagPattern(Pattern): # pragma: no cover
|
||||
"""
|
||||
Return element of type `tag` with a text attribute of group(3)
|
||||
of a Pattern.
|
||||
|
||||
"""
|
||||
def __init__(self, pattern, tag):
|
||||
Pattern.__init__(self, pattern)
|
||||
self.tag = tag
|
||||
|
||||
def handleMatch(self, m):
|
||||
el = etree.Element(self.tag)
|
||||
el.text = m.group(3)
|
||||
return el
|
||||
|
||||
|
||||
class SimpleTagInlineProcessor(InlineProcessor):
|
||||
"""
|
||||
Return element of type `tag` with a text attribute of group(2)
|
||||
of a Pattern.
|
||||
|
||||
"""
|
||||
def __init__(self, pattern, tag):
|
||||
InlineProcessor.__init__(self, pattern)
|
||||
self.tag = tag
|
||||
|
||||
def handleMatch(self, m, data): # pragma: no cover
|
||||
el = etree.Element(self.tag)
|
||||
el.text = m.group(2)
|
||||
return el, m.start(0), m.end(0)
|
||||
|
||||
|
||||
class SubstituteTagPattern(SimpleTagPattern): # pragma: no cover
|
||||
""" Return an element of type `tag` with no children. """
|
||||
def handleMatch(self, m):
|
||||
return etree.Element(self.tag)
|
||||
|
||||
|
||||
class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
|
||||
""" Return an element of type `tag` with no children. """
|
||||
def handleMatch(self, m, data):
|
||||
return etree.Element(self.tag), m.start(0), m.end(0)
|
||||
|
||||
|
||||
class BacktickInlineProcessor(InlineProcessor):
|
||||
""" Return a `<code>` element containing the matching text. """
|
||||
def __init__(self, pattern):
|
||||
InlineProcessor.__init__(self, pattern)
|
||||
self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX)
|
||||
self.tag = 'code'
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
if m.group(3):
|
||||
el = etree.Element(self.tag)
|
||||
el.text = util.AtomicString(util.code_escape(m.group(3).strip()))
|
||||
return el, m.start(0), m.end(0)
|
||||
else:
|
||||
return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
|
||||
|
||||
|
||||
class DoubleTagPattern(SimpleTagPattern): # pragma: no cover
|
||||
"""Return a ElementTree element nested in tag2 nested in tag1.
|
||||
|
||||
Useful for strong emphasis etc.
|
||||
|
||||
"""
|
||||
def handleMatch(self, m):
|
||||
tag1, tag2 = self.tag.split(",")
|
||||
el1 = etree.Element(tag1)
|
||||
el2 = etree.SubElement(el1, tag2)
|
||||
el2.text = m.group(3)
|
||||
if len(m.groups()) == 5:
|
||||
el2.tail = m.group(4)
|
||||
return el1
|
||||
|
||||
|
||||
class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
|
||||
"""Return a ElementTree element nested in tag2 nested in tag1.
|
||||
|
||||
Useful for strong emphasis etc.
|
||||
|
||||
"""
|
||||
def handleMatch(self, m, data): # pragma: no cover
|
||||
tag1, tag2 = self.tag.split(",")
|
||||
el1 = etree.Element(tag1)
|
||||
el2 = etree.SubElement(el1, tag2)
|
||||
el2.text = m.group(2)
|
||||
if len(m.groups()) == 3:
|
||||
el2.tail = m.group(3)
|
||||
return el1, m.start(0), m.end(0)
|
||||
|
||||
|
||||
class HtmlInlineProcessor(InlineProcessor):
|
||||
""" Store raw inline html and return a placeholder. """
|
||||
def handleMatch(self, m, data):
|
||||
rawhtml = self.unescape(m.group(1))
|
||||
place_holder = self.md.htmlStash.store(rawhtml)
|
||||
return place_holder, m.start(0), m.end(0)
|
||||
|
||||
def unescape(self, text):
|
||||
""" Return unescaped text given text with an inline placeholder. """
|
||||
try:
|
||||
stash = self.md.treeprocessors['inline'].stashed_nodes
|
||||
except KeyError: # pragma: no cover
|
||||
return text
|
||||
|
||||
def get_stash(m):
|
||||
id = m.group(1)
|
||||
value = stash.get(id)
|
||||
if value is not None:
|
||||
try:
|
||||
return self.md.serializer(value)
|
||||
except Exception:
|
||||
return r'\%s' % value
|
||||
|
||||
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
|
||||
|
||||
|
||||
class AsteriskProcessor(InlineProcessor):
|
||||
"""Emphasis processor for handling strong and em matches inside asterisks."""
|
||||
|
||||
PATTERNS = [
|
||||
EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
|
||||
EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
|
||||
EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
|
||||
EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
|
||||
EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
|
||||
]
|
||||
|
||||
def build_single(self, m, tag, idx):
|
||||
"""Return single tag."""
|
||||
el1 = etree.Element(tag)
|
||||
text = m.group(2)
|
||||
self.parse_sub_patterns(text, el1, None, idx)
|
||||
return el1
|
||||
|
||||
def build_double(self, m, tags, idx):
|
||||
"""Return double tag."""
|
||||
|
||||
tag1, tag2 = tags.split(",")
|
||||
el1 = etree.Element(tag1)
|
||||
el2 = etree.Element(tag2)
|
||||
text = m.group(2)
|
||||
self.parse_sub_patterns(text, el2, None, idx)
|
||||
el1.append(el2)
|
||||
if len(m.groups()) == 3:
|
||||
text = m.group(3)
|
||||
self.parse_sub_patterns(text, el1, el2, idx)
|
||||
return el1
|
||||
|
||||
def build_double2(self, m, tags, idx):
|
||||
"""Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
|
||||
|
||||
tag1, tag2 = tags.split(",")
|
||||
el1 = etree.Element(tag1)
|
||||
el2 = etree.Element(tag2)
|
||||
text = m.group(2)
|
||||
self.parse_sub_patterns(text, el1, None, idx)
|
||||
text = m.group(3)
|
||||
el1.append(el2)
|
||||
self.parse_sub_patterns(text, el2, None, idx)
|
||||
return el1
|
||||
|
||||
def parse_sub_patterns(self, data, parent, last, idx):
|
||||
"""
|
||||
Parses sub patterns.
|
||||
|
||||
`data` (`str`):
|
||||
text to evaluate.
|
||||
|
||||
`parent` (`etree.Element`):
|
||||
Parent to attach text and sub elements to.
|
||||
|
||||
`last` (`etree.Element`):
|
||||
Last appended child to parent. Can also be None if parent has no children.
|
||||
|
||||
`idx` (`int`):
|
||||
Current pattern index that was used to evaluate the parent.
|
||||
|
||||
"""
|
||||
|
||||
offset = 0
|
||||
pos = 0
|
||||
|
||||
length = len(data)
|
||||
while pos < length:
|
||||
# Find the start of potential emphasis or strong tokens
|
||||
if self.compiled_re.match(data, pos):
|
||||
matched = False
|
||||
# See if the we can match an emphasis/strong pattern
|
||||
for index, item in enumerate(self.PATTERNS):
|
||||
# Only evaluate patterns that are after what was used on the parent
|
||||
if index <= idx:
|
||||
continue
|
||||
m = item.pattern.match(data, pos)
|
||||
if m:
|
||||
# Append child nodes to parent
|
||||
# Text nodes should be appended to the last
|
||||
# child if present, and if not, it should
|
||||
# be added as the parent's text node.
|
||||
text = data[offset:m.start(0)]
|
||||
if text:
|
||||
if last is not None:
|
||||
last.tail = text
|
||||
else:
|
||||
parent.text = text
|
||||
el = self.build_element(m, item.builder, item.tags, index)
|
||||
parent.append(el)
|
||||
last = el
|
||||
# Move our position past the matched hunk
|
||||
offset = pos = m.end(0)
|
||||
matched = True
|
||||
if not matched:
|
||||
# We matched nothing, move on to the next character
|
||||
pos += 1
|
||||
else:
|
||||
# Increment position as no potential emphasis start was found.
|
||||
pos += 1
|
||||
|
||||
# Append any leftover text as a text node.
|
||||
text = data[offset:]
|
||||
if text:
|
||||
if last is not None:
|
||||
last.tail = text
|
||||
else:
|
||||
parent.text = text
|
||||
|
||||
def build_element(self, m, builder, tags, index):
|
||||
"""Element builder."""
|
||||
|
||||
if builder == 'double2':
|
||||
return self.build_double2(m, tags, index)
|
||||
elif builder == 'double':
|
||||
return self.build_double(m, tags, index)
|
||||
else:
|
||||
return self.build_single(m, tags, index)
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
"""Parse patterns."""
|
||||
|
||||
el = None
|
||||
start = None
|
||||
end = None
|
||||
|
||||
for index, item in enumerate(self.PATTERNS):
|
||||
m1 = item.pattern.match(data, m.start(0))
|
||||
if m1:
|
||||
start = m1.start(0)
|
||||
end = m1.end(0)
|
||||
el = self.build_element(m1, item.builder, item.tags, index)
|
||||
break
|
||||
return el, start, end
|
||||
|
||||
|
||||
class UnderscoreProcessor(AsteriskProcessor):
|
||||
"""Emphasis processor for handling strong and em matches inside underscores."""
|
||||
|
||||
PATTERNS = [
|
||||
EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
|
||||
EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
|
||||
EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
|
||||
EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
|
||||
EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
|
||||
]
|
||||
|
||||
|
||||
class LinkInlineProcessor(InlineProcessor):
|
||||
""" Return a link element from the given match. """
|
||||
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
|
||||
RE_TITLE_CLEAN = re.compile(r'\s')
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
text, index, handled = self.getText(data, m.end(0))
|
||||
|
||||
if not handled:
|
||||
return None, None, None
|
||||
|
||||
href, title, index, handled = self.getLink(data, index)
|
||||
if not handled:
|
||||
return None, None, None
|
||||
|
||||
el = etree.Element("a")
|
||||
el.text = text
|
||||
|
||||
el.set("href", href)
|
||||
|
||||
if title is not None:
|
||||
el.set("title", title)
|
||||
|
||||
return el, m.start(0), index
|
||||
|
||||
def getLink(self, data, index):
|
||||
"""Parse data between `()` of `[Text]()` allowing recursive `()`. """
|
||||
|
||||
href = ''
|
||||
title = None
|
||||
handled = False
|
||||
|
||||
m = self.RE_LINK.match(data, pos=index)
|
||||
if m and m.group(1):
|
||||
# Matches [Text](<link> "title")
|
||||
href = m.group(1)[1:-1].strip()
|
||||
if m.group(2):
|
||||
title = m.group(2)[1:-1]
|
||||
index = m.end(0)
|
||||
handled = True
|
||||
elif m:
|
||||
# Track bracket nesting and index in string
|
||||
bracket_count = 1
|
||||
backtrack_count = 1
|
||||
start_index = m.end()
|
||||
index = start_index
|
||||
last_bracket = -1
|
||||
|
||||
# Primary (first found) quote tracking.
|
||||
quote = None
|
||||
start_quote = -1
|
||||
exit_quote = -1
|
||||
ignore_matches = False
|
||||
|
||||
# Secondary (second found) quote tracking.
|
||||
alt_quote = None
|
||||
start_alt_quote = -1
|
||||
exit_alt_quote = -1
|
||||
|
||||
# Track last character
|
||||
last = ''
|
||||
|
||||
for pos in range(index, len(data)):
|
||||
c = data[pos]
|
||||
if c == '(':
|
||||
# Count nested (
|
||||
# Don't increment the bracket count if we are sure we're in a title.
|
||||
if not ignore_matches:
|
||||
bracket_count += 1
|
||||
elif backtrack_count > 0:
|
||||
backtrack_count -= 1
|
||||
elif c == ')':
|
||||
# Match nested ) to (
|
||||
# Don't decrement if we are sure we are in a title that is unclosed.
|
||||
if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)):
|
||||
bracket_count = 0
|
||||
elif not ignore_matches:
|
||||
bracket_count -= 1
|
||||
elif backtrack_count > 0:
|
||||
backtrack_count -= 1
|
||||
# We've found our backup end location if the title doesn't resolve.
|
||||
if backtrack_count == 0:
|
||||
last_bracket = index + 1
|
||||
|
||||
elif c in ("'", '"'):
|
||||
# Quote has started
|
||||
if not quote:
|
||||
# We'll assume we are now in a title.
|
||||
# Brackets are quoted, so no need to match them (except for the final one).
|
||||
ignore_matches = True
|
||||
backtrack_count = bracket_count
|
||||
bracket_count = 1
|
||||
start_quote = index + 1
|
||||
quote = c
|
||||
# Secondary quote (in case the first doesn't resolve): [text](link'"title")
|
||||
elif c != quote and not alt_quote:
|
||||
start_alt_quote = index + 1
|
||||
alt_quote = c
|
||||
# Update primary quote match
|
||||
elif c == quote:
|
||||
exit_quote = index + 1
|
||||
# Update secondary quote match
|
||||
elif alt_quote and c == alt_quote:
|
||||
exit_alt_quote = index + 1
|
||||
|
||||
index += 1
|
||||
|
||||
# Link is closed, so let's break out of the loop
|
||||
if bracket_count == 0:
|
||||
# Get the title if we closed a title string right before link closed
|
||||
if exit_quote >= 0 and quote == last:
|
||||
href = data[start_index:start_quote - 1]
|
||||
title = ''.join(data[start_quote:exit_quote - 1])
|
||||
elif exit_alt_quote >= 0 and alt_quote == last:
|
||||
href = data[start_index:start_alt_quote - 1]
|
||||
title = ''.join(data[start_alt_quote:exit_alt_quote - 1])
|
||||
else:
|
||||
href = data[start_index:index - 1]
|
||||
break
|
||||
|
||||
if c != ' ':
|
||||
last = c
|
||||
|
||||
# We have a scenario: [test](link"notitle)
|
||||
# When we enter a string, we stop tracking bracket resolution in the main counter,
|
||||
# but we do keep a backup counter up until we discover where we might resolve all brackets
|
||||
# if the title string fails to resolve.
|
||||
if bracket_count != 0 and backtrack_count == 0:
|
||||
href = data[start_index:last_bracket - 1]
|
||||
index = last_bracket
|
||||
bracket_count = 0
|
||||
|
||||
handled = bracket_count == 0
|
||||
|
||||
if title is not None:
|
||||
title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip())))
|
||||
|
||||
href = self.unescape(href).strip()
|
||||
|
||||
return href, title, index, handled
|
||||
|
||||
def getText(self, data, index):
|
||||
"""Parse the content between `[]` of the start of an image or link
|
||||
resolving nested square brackets.
|
||||
|
||||
"""
|
||||
bracket_count = 1
|
||||
text = []
|
||||
for pos in range(index, len(data)):
|
||||
c = data[pos]
|
||||
if c == ']':
|
||||
bracket_count -= 1
|
||||
elif c == '[':
|
||||
bracket_count += 1
|
||||
index += 1
|
||||
if bracket_count == 0:
|
||||
break
|
||||
text.append(c)
|
||||
return ''.join(text), index, bracket_count == 0
|
||||
|
||||
|
||||
class ImageInlineProcessor(LinkInlineProcessor):
|
||||
""" Return a img element from the given match. """
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
text, index, handled = self.getText(data, m.end(0))
|
||||
if not handled:
|
||||
return None, None, None
|
||||
|
||||
src, title, index, handled = self.getLink(data, index)
|
||||
if not handled:
|
||||
return None, None, None
|
||||
|
||||
el = etree.Element("img")
|
||||
|
||||
el.set("src", src)
|
||||
|
||||
if title is not None:
|
||||
el.set("title", title)
|
||||
|
||||
el.set('alt', self.unescape(text))
|
||||
return el, m.start(0), index
|
||||
|
||||
|
||||
class ReferenceInlineProcessor(LinkInlineProcessor):
|
||||
""" Match to a stored reference and return link element. """
|
||||
NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE)
|
||||
|
||||
RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
|
||||
|
||||
def handleMatch(self, m, data):
|
||||
text, index, handled = self.getText(data, m.end(0))
|
||||
if not handled:
|
||||
return None, None, None
|
||||
|
||||
id, end, handled = self.evalId(data, index, text)
|
||||
if not handled:
|
||||
return None, None, None
|
||||
|
||||
# Clean up linebreaks in id
|
||||
id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
|
||||
if id not in self.md.references: # ignore undefined refs
|
||||
return None, m.start(0), end
|
||||
|
||||
href, title = self.md.references[id]
|
||||
|
||||
return self.makeTag(href, title, text), m.start(0), end
|
||||
|
||||
def evalId(self, data, index, text):
|
||||
"""
|
||||
Evaluate the id portion of [ref][id].
|
||||
|
||||
If [ref][] use [ref].
|
||||
"""
|
||||
m = self.RE_LINK.match(data, pos=index)
|
||||
if not m:
|
||||
return None, index, False
|
||||
else:
|
||||
id = m.group(1).lower()
|
||||
end = m.end(0)
|
||||
if not id:
|
||||
id = text.lower()
|
||||
return id, end, True
|
||||
|
||||
def makeTag(self, href, title, text):
|
||||
el = etree.Element('a')
|
||||
|
||||
el.set('href', href)
|
||||
if title:
|
||||
el.set('title', title)
|
||||
|
||||
el.text = text
|
||||
return el
|
||||
|
||||
|
||||
class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
|
||||
"""Short form of reference: [google]. """
|
||||
def evalId(self, data, index, text):
|
||||
"""Evaluate the id from of [ref] """
|
||||
|
||||
return text.lower(), index, True
|
||||
|
||||
|
||||
class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
|
||||
""" Match to a stored reference and return img element. """
|
||||
def makeTag(self, href, title, text):
|
||||
el = etree.Element("img")
|
||||
el.set("src", href)
|
||||
if title:
|
||||
el.set("title", title)
|
||||
el.set("alt", self.unescape(text))
|
||||
return el
|
||||
|
||||
|
||||
class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor):
|
||||
""" Short form of inage reference: ![ref]. """
|
||||
def evalId(self, data, index, text):
|
||||
"""Evaluate the id from of [ref] """
|
||||
|
||||
return text.lower(), index, True
|
||||
|
||||
|
||||
class AutolinkInlineProcessor(InlineProcessor):
|
||||
""" Return a link Element given an autolink (`<http://example/com>`). """
|
||||
def handleMatch(self, m, data):
|
||||
el = etree.Element("a")
|
||||
el.set('href', self.unescape(m.group(1)))
|
||||
el.text = util.AtomicString(m.group(1))
|
||||
return el, m.start(0), m.end(0)
|
||||
|
||||
|
||||
class AutomailInlineProcessor(InlineProcessor):
|
||||
"""
|
||||
Return a mailto link Element given an automail link (`<foo@example.com>`).
|
||||
"""
|
||||
def handleMatch(self, m, data):
|
||||
el = etree.Element('a')
|
||||
email = self.unescape(m.group(1))
|
||||
if email.startswith("mailto:"):
|
||||
email = email[len("mailto:"):]
|
||||
|
||||
def codepoint2name(code):
|
||||
"""Return entity definition by code, or the code if not defined."""
|
||||
entity = entities.codepoint2name.get(code)
|
||||
if entity:
|
||||
return "{}{};".format(util.AMP_SUBSTITUTE, entity)
|
||||
else:
|
||||
return "%s#%d;" % (util.AMP_SUBSTITUTE, code)
|
||||
|
||||
letters = [codepoint2name(ord(letter)) for letter in email]
|
||||
el.text = util.AtomicString(''.join(letters))
|
||||
|
||||
mailto = "mailto:" + email
|
||||
mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
|
||||
ord(letter) for letter in mailto])
|
||||
el.set('href', mailto)
|
||||
return el, m.start(0), m.end(0)
|
Reference in New Issue
Block a user