1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-10 10:40:11 +01:00

Merge remote-tracking branch 'origin/master' into pr/8968

This commit is contained in:
pukkandan 2024-02-25 06:11:58 +05:30
commit 6c5c7d378e
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
8 changed files with 80 additions and 54 deletions

View File

@ -690,7 +690,6 @@ class YoutubeDL:
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None) self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
@ -964,6 +963,7 @@ class YoutubeDL:
def close(self): def close(self):
self.save_cookies() self.save_cookies()
self._request_director.close() self._request_director.close()
del self._request_director
def trouble(self, message=None, tb=None, is_error=True): def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@ -4160,6 +4160,10 @@ class YoutubeDL:
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director return director
@functools.cached_property
def _request_director(self):
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
def encode(self, s): def encode(self, s):
if isinstance(s, bytes): if isinstance(s, bytes):
return s # Already encoded return s # Already encoded

View File

@ -300,7 +300,7 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({ entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'], 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'), 'format': f.get('format'),
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),

View File

@ -4,27 +4,25 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x) _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
https?:// _EMBED_REGEX = [
(?: rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
(?:watch\.)?%s/| rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
%s ]
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': { 'info_dict': {
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
'info_dict': {
'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
'duration': 172, 'duration': 172,
'view_count': int, 'view_count': int,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'md5': '82dbd49b38e3af1d00df16acbeab260c', 'md5': '82dbd49b38e3af1d00df16acbeab260c',
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
}] }]
_VIDEO_ID_REGEXES = [ _VIDEO_ID_REGEXES = [
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)', r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
r'<video embed=[^>]+><id>(\d+)</id>', r'<video embed=[^>]+><id>(\d+)</id>',
r'<video restriction[^>]+><key>(\d+)</key>', r'<video restriction[^>]+><key>(\d+)</key>',
] ]

View File

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj from ..utils import ExtractorError, int_or_none, traverse_obj
class SwearnetEpisodeIE(InfoExtractor): class SwearnetEpisodeIE(InfoExtractor):
@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') try:
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
except ExtractorError:
if 'Upgrade Now' in webpage:
self.raise_login_required()
raise
json_data = self._download_json( json_data = self._download_json(
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0] f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]

View File

@ -48,17 +48,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
return url, data, headers return url, data, headers
def _perform_login(self, username, password): def _perform_login(self, username, password):
webpage = self._download_webpage( viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
self._LOGIN_URL, None, 'Downloading login page')
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = { data = {
'action': 'login', 'action': 'login',
'email': username, 'email': username,
'password': password, 'password': password,
'service': 'vimeo', 'service': 'vimeo',
'token': token, 'token': viewer['xsrft'],
} }
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
self._download_webpage( self._download_webpage(
self._LOGIN_URL, None, 'Logging in', self._LOGIN_URL, None, 'Logging in',

View File

@ -3640,15 +3640,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients) return orderedSet(requested_clients)
def _invalid_player_response(self, pr, video_id):
# YouTube may return a different video player response than expected.
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None initial_pr = None
if webpage: if webpage:
initial_pr = self._search_json( initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
prs = []
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
prs.append({**initial_pr, 'streamingData': None})
all_clients = set(clients) all_clients = set(clients)
clients = clients[::-1] clients = clients[::-1]
prs = []
def append_client(*client_names): def append_client(*client_names):
""" Append the first client name that exists but not already used """ """ Append the first client name that exists but not already used """
@ -3660,18 +3673,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
all_clients.add(actual_client) all_clients.add(actual_client)
return return
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
if initial_pr:
pr = dict(initial_pr)
pr['streamingData'] = None
prs.append(pr)
last_error = None
tried_iframe_fallback = False tried_iframe_fallback = False
player_url = None player_url = None
skipped_clients = {}
while clients: while clients:
client, base_client, variant = _split_innertube_client(clients.pop()) client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {} player_ytcfg = master_ytcfg if client == 'web' else {}
@ -3692,26 +3696,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e: except ExtractorError as e:
if last_error: self.report_warning(e)
self.report_warning(last_error)
last_error = e
continue continue
if pr: if pr_id := self._invalid_player_response(pr, video_id):
# YouTube may return a different video player response than expected. skipped_clients[client] = pr_id
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713 elif pr:
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) # Save client name for introspection later
if pr_video_id and pr_video_id != video_id: name = short_client_name(client)
self.report_warning( sd = traverse_obj(pr, ('streamingData', {dict})) or {}
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message()) sd[STREAMING_DATA_CLIENT_NAME] = name
else: for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
# Save client name for introspection later f[STREAMING_DATA_CLIENT_NAME] = name
name = short_client_name(client) prs.append(pr)
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
sd[STREAMING_DATA_CLIENT_NAME] = name
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
@ -3722,10 +3719,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif not variant: elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
if last_error: if skipped_clients:
if not len(prs): self.report_warning(
raise last_error f'Skipping player responses from {"/".join(skipped_clients)} clients '
self.report_warning(last_error) f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
if not prs:
raise ExtractorError(
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
elif not prs:
raise ExtractorError('Failed to extract any player response')
return prs, player_url return prs, player_url
def _needs_live_processing(self, live_status, duration): def _needs_live_processing(self, live_status, duration):

View File

@ -68,6 +68,7 @@ class RequestDirector:
def close(self): def close(self):
for handler in self.handlers.values(): for handler in self.handlers.values():
handler.close() handler.close()
self.handlers = {}
def add_handler(self, handler: RequestHandler): def add_handler(self, handler: RequestHandler):
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it""" """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""