[extractor/youtube] Detect `lazy-load-for-videos` embeds

Closes #4812
This commit is contained in:
pukkandan 2022-09-02 01:28:56 +05:30
parent adba24d207
commit 7c6eb424d3
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
1 changed files with 20 additions and 13 deletions

View File

@ -923,19 +923,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:\#|$)""" % { (?:\#|$)""" % {
'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
} }
_EMBED_REGEX = [r'''(?x) _EMBED_REGEX = [
(?: r'''(?x)
<iframe[^>]+?src=| (?:
data-video-url=| <iframe[^>]+?src=|
<embed[^>]+?src=| data-video-url=|
embedSWF\(?:\s*| <embed[^>]+?src=|
<object[^>]+data=| embedSWF\(?:\s*|
new\s+SWFObject\( <object[^>]+data=|
) new\s+SWFObject\(
(["\']) )
(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ (["\'])
(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
\1'''] (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
\1''',
# https://wordpress.org/plugins/lazy-load-for-videos/
r'''(?xs)
<a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
\s[^>]*\bclass="[^"]*\blazy-load-youtube''',
]
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',