mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-10 14:40:04 +01:00
Merge remote-tracking branch 'origin/master' into pr/8968
This commit is contained in:
commit
6c5c7d378e
@ -690,7 +690,6 @@ class YoutubeDL:
|
|||||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||||
self.params['http_headers'].pop('Cookie', None)
|
self.params['http_headers'].pop('Cookie', None)
|
||||||
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
|
||||||
|
|
||||||
if auto_init and auto_init != 'no_verbose_header':
|
if auto_init and auto_init != 'no_verbose_header':
|
||||||
self.print_debug_header()
|
self.print_debug_header()
|
||||||
@ -964,6 +963,7 @@ class YoutubeDL:
|
|||||||
def close(self):
|
def close(self):
|
||||||
self.save_cookies()
|
self.save_cookies()
|
||||||
self._request_director.close()
|
self._request_director.close()
|
||||||
|
del self._request_director
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None, is_error=True):
|
def trouble(self, message=None, tb=None, is_error=True):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
@ -4160,6 +4160,10 @@ class YoutubeDL:
|
|||||||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||||
return director
|
return director
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _request_director(self):
|
||||||
|
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||||
|
|
||||||
def encode(self, s):
|
def encode(self, s):
|
||||||
if isinstance(s, bytes):
|
if isinstance(s, bytes):
|
||||||
return s # Already encoded
|
return s # Already encoded
|
||||||
|
@ -300,7 +300,7 @@ class ArchiveOrgIE(InfoExtractor):
|
|||||||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||||
entry['formats'].append({
|
entry['formats'].append({
|
||||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||||
'format': f.get('format'),
|
'format': f.get('format'),
|
||||||
'width': int_or_none(f.get('width')),
|
'width': int_or_none(f.get('width')),
|
||||||
'height': int_or_none(f.get('height')),
|
'height': int_or_none(f.get('height')),
|
||||||
|
@ -4,27 +4,25 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class CloudflareStreamIE(InfoExtractor):
|
class CloudflareStreamIE(InfoExtractor):
|
||||||
|
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||||
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||||
https?://
|
_EMBED_REGEX = [
|
||||||
(?:
|
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||||
(?:watch\.)?%s/|
|
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||||
%s
|
]
|
||||||
)
|
|
||||||
(?P<id>%s)
|
|
||||||
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
|
||||||
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||||
|
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||||
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
|
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||||
|
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
|
|||||||
'duration': 172,
|
'duration': 172,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||||
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
_VIDEO_ID_REGEXES = [
|
_VIDEO_ID_REGEXES = [
|
||||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
|
||||||
|
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
|
||||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||||
]
|
]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none, traverse_obj
|
from ..utils import ExtractorError, int_or_none, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class SwearnetEpisodeIE(InfoExtractor):
|
class SwearnetEpisodeIE(InfoExtractor):
|
||||||
@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
|
|||||||
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
try:
|
||||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||||
|
except ExtractorError:
|
||||||
|
if 'Upgrade Now' in webpage:
|
||||||
|
self.raise_login_required()
|
||||||
|
raise
|
||||||
|
|
||||||
json_data = self._download_json(
|
json_data = self._download_json(
|
||||||
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
||||||
|
|
||||||
|
@ -48,17 +48,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
return url, data, headers
|
return url, data, headers
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
webpage = self._download_webpage(
|
viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
|
||||||
self._LOGIN_URL, None, 'Downloading login page')
|
|
||||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
|
||||||
data = {
|
data = {
|
||||||
'action': 'login',
|
'action': 'login',
|
||||||
'email': username,
|
'email': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
'service': 'vimeo',
|
'service': 'vimeo',
|
||||||
'token': token,
|
'token': viewer['xsrft'],
|
||||||
}
|
}
|
||||||
self._set_vimeo_cookie('vuid', vuid)
|
self._set_vimeo_cookie('vuid', viewer['vuid'])
|
||||||
try:
|
try:
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
self._LOGIN_URL, None, 'Logging in',
|
self._LOGIN_URL, None, 'Logging in',
|
||||||
|
@ -3640,15 +3640,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
return orderedSet(requested_clients)
|
return orderedSet(requested_clients)
|
||||||
|
|
||||||
|
def _invalid_player_response(self, pr, video_id):
|
||||||
|
# YouTube may return a different video player response than expected.
|
||||||
|
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||||
|
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
|
||||||
|
return pr_id
|
||||||
|
|
||||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||||
initial_pr = None
|
initial_pr = None
|
||||||
if webpage:
|
if webpage:
|
||||||
initial_pr = self._search_json(
|
initial_pr = self._search_json(
|
||||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||||
|
|
||||||
|
prs = []
|
||||||
|
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
||||||
|
# Android player_response does not have microFormats which are needed for
|
||||||
|
# extraction of some data. So we return the initial_pr with formats
|
||||||
|
# stripped out even if not requested by the user
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||||
|
prs.append({**initial_pr, 'streamingData': None})
|
||||||
|
|
||||||
all_clients = set(clients)
|
all_clients = set(clients)
|
||||||
clients = clients[::-1]
|
clients = clients[::-1]
|
||||||
prs = []
|
|
||||||
|
|
||||||
def append_client(*client_names):
|
def append_client(*client_names):
|
||||||
""" Append the first client name that exists but not already used """
|
""" Append the first client name that exists but not already used """
|
||||||
@ -3660,18 +3673,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
all_clients.add(actual_client)
|
all_clients.add(actual_client)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Android player_response does not have microFormats which are needed for
|
|
||||||
# extraction of some data. So we return the initial_pr with formats
|
|
||||||
# stripped out even if not requested by the user
|
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
|
||||||
if initial_pr:
|
|
||||||
pr = dict(initial_pr)
|
|
||||||
pr['streamingData'] = None
|
|
||||||
prs.append(pr)
|
|
||||||
|
|
||||||
last_error = None
|
|
||||||
tried_iframe_fallback = False
|
tried_iframe_fallback = False
|
||||||
player_url = None
|
player_url = None
|
||||||
|
skipped_clients = {}
|
||||||
while clients:
|
while clients:
|
||||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||||
@ -3692,19 +3696,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
||||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
|
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if last_error:
|
self.report_warning(e)
|
||||||
self.report_warning(last_error)
|
|
||||||
last_error = e
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if pr:
|
if pr_id := self._invalid_player_response(pr, video_id):
|
||||||
# YouTube may return a different video player response than expected.
|
skipped_clients[client] = pr_id
|
||||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
elif pr:
|
||||||
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
|
|
||||||
if pr_video_id and pr_video_id != video_id:
|
|
||||||
self.report_warning(
|
|
||||||
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
|
|
||||||
else:
|
|
||||||
# Save client name for introspection later
|
# Save client name for introspection later
|
||||||
name = short_client_name(client)
|
name = short_client_name(client)
|
||||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||||
@ -3722,10 +3719,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
elif not variant:
|
elif not variant:
|
||||||
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
|
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
|
||||||
|
|
||||||
if last_error:
|
if skipped_clients:
|
||||||
if not len(prs):
|
self.report_warning(
|
||||||
raise last_error
|
f'Skipping player responses from {"/".join(skipped_clients)} clients '
|
||||||
self.report_warning(last_error)
|
f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
|
||||||
|
if not prs:
|
||||||
|
raise ExtractorError(
|
||||||
|
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
|
||||||
|
elif not prs:
|
||||||
|
raise ExtractorError('Failed to extract any player response')
|
||||||
return prs, player_url
|
return prs, player_url
|
||||||
|
|
||||||
def _needs_live_processing(self, live_status, duration):
|
def _needs_live_processing(self, live_status, duration):
|
||||||
|
@ -68,6 +68,7 @@ class RequestDirector:
|
|||||||
def close(self):
|
def close(self):
|
||||||
for handler in self.handlers.values():
|
for handler in self.handlers.values():
|
||||||
handler.close()
|
handler.close()
|
||||||
|
self.handlers = {}
|
||||||
|
|
||||||
def add_handler(self, handler: RequestHandler):
|
def add_handler(self, handler: RequestHandler):
|
||||||
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
|
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user