[extractor/common] Allow HTML5 unquoted attribute values
Fixes #7108 HTML5 allows unquoted attribute values. See the "Unquoted attribute value syntax" section [1] for more information [1] http://www.w3.org/TR/html5/syntax.html
This commit is contained in:
parent
54a5428518
commit
57935b2564
|
@ -646,7 +646,7 @@ class InfoExtractor(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\')'
|
||||||
property_re = r'(?:name|property)=[\'"]og:%s[\'"]' % re.escape(prop)
|
property_re = r'(?:name|property)=[\'"]?og:%s[\'"]?' % re.escape(prop)
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
return [
|
return [
|
||||||
template % (property_re, content_re),
|
template % (property_re, content_re),
|
||||||
|
|
Loading…
Reference in New Issue