[youtube:tab] Show unavailable videos in playlists (#242)

Closes #231

Authored by: colethedj
This commit is contained in:
coletdjnz 2021-04-16 22:39:08 +00:00 committed by GitHub
parent a7191c6f57
commit 358de58c4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 71 additions and 5 deletions

View File

@ -29,6 +29,7 @@ from ..utils import (
clean_html, clean_html,
dict_get, dict_get,
datetime_from_str, datetime_from_str,
error_to_compat_str,
ExtractorError, ExtractorError,
format_field, format_field,
float_or_none, float_or_none,
@ -2628,6 +2629,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, {
'note': 'Playlist with "show unavailable videos" button',
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
'info_dict': {
'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
'uploader': 'Phim Siêu Nhân Nhật Bản',
'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
},
'playlist_mincount': 1400,
'expected_warnings': [
'YouTube said: INFO - Unavailable videos are hidden',
]
}, { }, {
# https://github.com/ytdl-org/youtube-dl/issues/21844 # https://github.com/ytdl-org/youtube-dl/issues/21844
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
@ -3293,8 +3307,49 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if errors: if errors:
raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
def _reload_with_unavailable_videos(self, item_id, data, webpage):
"""
Get playlist with unavailable videos if the 'show unavailable videos' button exists.
"""
sidebar_renderer = try_get(
data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
for item in sidebar_renderer:
if not isinstance(item, dict):
continue
renderer = item.get('playlistSidebarPrimaryInfoRenderer')
menu_renderer = try_get(
renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
for menu_item in menu_renderer:
if not isinstance(menu_item, dict):
continue
nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
text = try_get(
nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
if not text or text.lower() != 'show unavailable videos':
continue
browse_endpoint = try_get(
nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
browse_id = browse_endpoint.get('browseId')
params = browse_endpoint.get('params')
if not browse_id or not params:
return
ytcfg = self._extract_ytcfg(item_id, webpage)
headers = self._generate_api_headers(
ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
identity_token=self._extract_identity_token(webpage, item_id=item_id),
visitor_data=try_get(
self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
query = {
'params': params,
'browseId': browse_id
}
return self._extract_response(
item_id=item_id, headers=headers, query=query,
check_get_keys='contents', fatal=False,
note='Downloading API JSON with unavailable videos')
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse'): ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
response = None response = None
last_error = None last_error = None
count = -1 count = -1
@ -3308,8 +3363,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
try: try:
response = self._call_api( response = self._call_api(
ep=ep, fatal=True, headers=headers, ep=ep, fatal=True, headers=headers,
video_id=item_id, video_id=item_id, query=query,
query=query,
context=self._extract_context(ytcfg), context=self._extract_context(ytcfg),
api_key=self._extract_api_key(ytcfg), api_key=self._extract_api_key(ytcfg),
note='%s%s' % (note, ' (retry #%d)' % count if count else '')) note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
@ -3320,7 +3374,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
last_error = 'HTTP Error %s' % e.cause.code last_error = 'HTTP Error %s' % e.cause.code
if count < retries: if count < retries:
continue continue
raise if fatal:
raise
else:
self.report_warning(error_to_compat_str(e))
return
else: else:
# Youtube may send alerts if there was an issue with the continuation page # Youtube may send alerts if there was an issue with the continuation page
self._extract_alerts(response, expected=False) self._extract_alerts(response, expected=False)
@ -3330,7 +3389,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# See: https://github.com/ytdl-org/youtube-dl/issues/28194 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
last_error = 'Incomplete data received' last_error = 'Incomplete data received'
if count >= retries: if count >= retries:
self._downloader.report_error(last_error) if fatal:
raise ExtractorError(last_error)
else:
self.report_warning(last_error)
return
return response return response
def _extract_webpage(self, url, item_id): def _extract_webpage(self, url, item_id):
@ -3389,6 +3452,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
webpage, data = self._extract_webpage(url, item_id) webpage, data = self._extract_webpage(url, item_id)
# YouTube sometimes provides a button to reload playlist with unavailable videos.
data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
tabs = try_get( tabs = try_get(
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
if tabs: if tabs: