mirror of https://github.com/yt-dlp/yt-dlp.git
[extractor/youtube] Fix initial player response extraction
Authored by: pukkandan, coletdjnz
This commit is contained in:
parent
ee164987c7
commit
ee27297f82
|
@ -1033,11 +1033,19 @@ class InfoExtractor:
|
||||||
expected_status=expected_status)
|
expected_status=expected_status)
|
||||||
return res if res is False else res[0]
|
return res if res is False else res[0]
|
||||||
|
|
||||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, lenient=False):
|
||||||
if transform_source:
|
if transform_source:
|
||||||
json_string = transform_source(json_string)
|
json_string = transform_source(json_string)
|
||||||
try:
|
try:
|
||||||
return json.loads(json_string, strict=False)
|
try:
|
||||||
|
return json.loads(json_string, strict=False)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
if not lenient:
|
||||||
|
raise
|
||||||
|
try:
|
||||||
|
return json.loads(json_string[:e.pos], strict=False)
|
||||||
|
except ValueError:
|
||||||
|
raise e
|
||||||
except ValueError as ve:
|
except ValueError as ve:
|
||||||
errmsg = '%s: Failed to parse JSON ' % video_id
|
errmsg = '%s: Failed to parse JSON ' % video_id
|
||||||
if fatal:
|
if fatal:
|
||||||
|
|
|
@ -397,8 +397,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
if self._LOGIN_REQUIRED and not self._cookies_passed:
|
if self._LOGIN_REQUIRED and not self._cookies_passed:
|
||||||
self.raise_login_required('Login details are needed to download this content', method='cookies')
|
self.raise_login_required('Login details are needed to download this content', method='cookies')
|
||||||
|
|
||||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+})\s*;'
|
||||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+})\s*;'
|
||||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||||
|
|
||||||
def _get_default_ytcfg(self, client='web'):
|
def _get_default_ytcfg(self, client='web'):
|
||||||
|
@ -2212,28 +2212,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
# Story. Requires specific player params to work.
|
# Story. Requires specific player params to work.
|
||||||
# Note: stories get removed after some period of time
|
# Note: stories get removed after some period of time
|
||||||
'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
|
'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'yN3x1t3sieA',
|
'id': 'vv8qTUWmulI',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'uploader': 'Linus Tech Tips',
|
'availability': 'unlisted',
|
||||||
'duration': 13,
|
'view_count': int,
|
||||||
'channel': 'Linus Tech Tips',
|
'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
|
||||||
'playable_in_embed': True,
|
'upload_date': '20220526',
|
||||||
'tags': [],
|
'categories': ['Education'],
|
||||||
'age_limit': 0,
|
|
||||||
'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
|
|
||||||
'upload_date': '20220402',
|
|
||||||
'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
|
|
||||||
'title': 'Story',
|
'title': 'Story',
|
||||||
|
'channel': 'IT\'S HISTORY',
|
||||||
|
'description': '',
|
||||||
|
'uploader_id': 'BlastfromthePast',
|
||||||
|
'duration': 12,
|
||||||
|
'uploader': 'IT\'S HISTORY',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'age_limit': 0,
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
'uploader_id': 'LinusTechTips',
|
'tags': [],
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'tjjjtzRLHvA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'ハッシュタグ無し };if window.ytcsi',
|
||||||
|
'upload_date': '20220323',
|
||||||
|
'like_count': int,
|
||||||
|
'availability': 'unlisted',
|
||||||
|
'channel': 'nao20010128nao',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
|
||||||
|
'age_limit': 0,
|
||||||
|
'uploader': 'nao20010128nao',
|
||||||
|
'uploader_id': 'nao20010128nao',
|
||||||
|
'categories': ['Music'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': '',
|
'description': '',
|
||||||
'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
|
'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
|
||||||
'categories': ['Science & Technology'],
|
'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
|
'live_status': 'not_live',
|
||||||
'availability': 'unlisted',
|
'playable_in_embed': True,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'duration': 6,
|
||||||
|
'tags': [],
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -2754,7 +2780,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
|
def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
|
||||||
return self._parse_json(self._search_regex(
|
return self._parse_json(self._search_regex(
|
||||||
(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
|
(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
|
||||||
regex), webpage, name, default='{}'), video_id, fatal=False)
|
regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)
|
||||||
|
|
||||||
def _extract_comment(self, comment_renderer, parent=None):
|
def _extract_comment(self, comment_renderer, parent=None):
|
||||||
comment_id = comment_renderer.get('commentId')
|
comment_id = comment_renderer.get('commentId')
|
||||||
|
|
Loading…
Reference in New Issue