mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-10 11:00:07 +01:00
Merge remote-tracking branch 'origin/master' into pr/8968
This commit is contained in:
commit
6c5c7d378e
@ -690,7 +690,6 @@ class YoutubeDL:
|
||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
self.params['http_headers'].pop('Cookie', None)
|
||||
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
@ -964,6 +963,7 @@ class YoutubeDL:
|
||||
def close(self):
|
||||
self.save_cookies()
|
||||
self._request_director.close()
|
||||
del self._request_director
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@ -4160,6 +4160,10 @@ class YoutubeDL:
|
||||
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
|
||||
return director
|
||||
|
||||
@functools.cached_property
|
||||
def _request_director(self):
|
||||
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
def encode(self, s):
|
||||
if isinstance(s, bytes):
|
||||
return s # Already encoded
|
||||
|
@ -300,7 +300,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
|
||||
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
|
||||
entry['formats'].append({
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
|
||||
'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
|
||||
'format': f.get('format'),
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
|
@ -4,27 +4,25 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:watch\.)?%s/|
|
||||
%s
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
||||
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||
'info_dict': {
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||
'info_dict': {
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
|
||||
'duration': 172,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
|
||||
'md5': '82dbd49b38e3af1d00df16acbeab260c',
|
||||
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_VIDEO_ID_REGEXES = [
|
||||
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
|
||||
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
|
||||
r'<video embed=[^>]+><id>(\d+)</id>',
|
||||
r'<video restriction[^>]+><key>(\d+)</key>',
|
||||
]
|
||||
|
@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import ExtractorError, int_or_none, traverse_obj
|
||||
|
||||
|
||||
class SwearnetEpisodeIE(InfoExtractor):
|
||||
@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
|
||||
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
try:
|
||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||
except ExtractorError:
|
||||
if 'Upgrade Now' in webpage:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
||||
|
||||
|
@ -48,17 +48,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
return url, data, headers
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
webpage = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
||||
viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
|
||||
data = {
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
'password': password,
|
||||
'service': 'vimeo',
|
||||
'token': token,
|
||||
'token': viewer['xsrft'],
|
||||
}
|
||||
self._set_vimeo_cookie('vuid', vuid)
|
||||
self._set_vimeo_cookie('vuid', viewer['vuid'])
|
||||
try:
|
||||
self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Logging in',
|
||||
|
@ -3640,15 +3640,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
return orderedSet(requested_clients)
|
||||
|
||||
def _invalid_player_response(self, pr, video_id):
|
||||
# YouTube may return a different video player response than expected.
|
||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
|
||||
return pr_id
|
||||
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||
initial_pr = None
|
||||
if webpage:
|
||||
initial_pr = self._search_json(
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||
|
||||
prs = []
|
||||
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
prs.append({**initial_pr, 'streamingData': None})
|
||||
|
||||
all_clients = set(clients)
|
||||
clients = clients[::-1]
|
||||
prs = []
|
||||
|
||||
def append_client(*client_names):
|
||||
""" Append the first client name that exists but not already used """
|
||||
@ -3660,18 +3673,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
all_clients.add(actual_client)
|
||||
return
|
||||
|
||||
# Android player_response does not have microFormats which are needed for
|
||||
# extraction of some data. So we return the initial_pr with formats
|
||||
# stripped out even if not requested by the user
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/501
|
||||
if initial_pr:
|
||||
pr = dict(initial_pr)
|
||||
pr['streamingData'] = None
|
||||
prs.append(pr)
|
||||
|
||||
last_error = None
|
||||
tried_iframe_fallback = False
|
||||
player_url = None
|
||||
skipped_clients = {}
|
||||
while clients:
|
||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||
@ -3692,26 +3696,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
|
||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
|
||||
except ExtractorError as e:
|
||||
if last_error:
|
||||
self.report_warning(last_error)
|
||||
last_error = e
|
||||
self.report_warning(e)
|
||||
continue
|
||||
|
||||
if pr:
|
||||
# YouTube may return a different video player response than expected.
|
||||
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
|
||||
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
|
||||
if pr_video_id and pr_video_id != video_id:
|
||||
self.report_warning(
|
||||
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
|
||||
else:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
if pr_id := self._invalid_player_response(pr, video_id):
|
||||
skipped_clients[client] = pr_id
|
||||
elif pr:
|
||||
# Save client name for introspection later
|
||||
name = short_client_name(client)
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
||||
prs.append(pr)
|
||||
|
||||
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
|
||||
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
|
||||
@ -3722,10 +3719,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
elif not variant:
|
||||
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
|
||||
|
||||
if last_error:
|
||||
if not len(prs):
|
||||
raise last_error
|
||||
self.report_warning(last_error)
|
||||
if skipped_clients:
|
||||
self.report_warning(
|
||||
f'Skipping player responses from {"/".join(skipped_clients)} clients '
|
||||
f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
|
||||
if not prs:
|
||||
raise ExtractorError(
|
||||
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
|
||||
elif not prs:
|
||||
raise ExtractorError('Failed to extract any player response')
|
||||
return prs, player_url
|
||||
|
||||
def _needs_live_processing(self, live_status, duration):
|
||||
|
@ -68,6 +68,7 @@ class RequestDirector:
|
||||
def close(self):
|
||||
for handler in self.handlers.values():
|
||||
handler.close()
|
||||
self.handlers = {}
|
||||
|
||||
def add_handler(self, handler: RequestHandler):
|
||||
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user