mirror of https://github.com/yt-dlp/yt-dlp.git
[ie/youtube] Add `po_token`, `visitor_data`, `data_sync_id` extractor args (#10648)
Authored by: seproDev, coletdjnz, bashonly
This commit is contained in:
parent
d1c4d88b2d
commit
3a3bd00037
|
@ -1777,6 +1777,9 @@ The following extractors use this feature:
|
||||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||||
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||||
|
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
|
||||||
|
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
|
||||||
|
* `po_token`: Proof of Origin (PO) Token(s) to use for requesting video playback. Comma seperated list of PO Tokens in the format `CLIENT+PO_TOKEN`, e.g. `youtube:po_token=web+XXX,android+YYY`
|
||||||
|
|
||||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||||
|
|
|
@ -69,6 +69,8 @@ from ..utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
||||||
|
STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token'
|
||||||
|
|
||||||
# any clients starting with _ cannot be explicitly requested by the user
|
# any clients starting with _ cannot be explicitly requested by the user
|
||||||
INNERTUBE_CLIENTS = {
|
INNERTUBE_CLIENTS = {
|
||||||
'web': {
|
'web': {
|
||||||
|
@ -79,6 +81,7 @@ INNERTUBE_CLIENTS = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
},
|
},
|
||||||
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
||||||
'web_safari': {
|
'web_safari': {
|
||||||
|
@ -90,6 +93,7 @@ INNERTUBE_CLIENTS = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
},
|
},
|
||||||
'web_embedded': {
|
'web_embedded': {
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
|
@ -132,6 +136,7 @@ INNERTUBE_CLIENTS = {
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
||||||
'REQUIRE_JS_PLAYER': False,
|
'REQUIRE_JS_PLAYER': False,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
},
|
},
|
||||||
'android_music': {
|
'android_music': {
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
|
@ -146,6 +151,7 @@ INNERTUBE_CLIENTS = {
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
|
||||||
'REQUIRE_JS_PLAYER': False,
|
'REQUIRE_JS_PLAYER': False,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
},
|
},
|
||||||
'android_creator': {
|
'android_creator': {
|
||||||
'INNERTUBE_CONTEXT': {
|
'INNERTUBE_CONTEXT': {
|
||||||
|
@ -160,6 +166,7 @@ INNERTUBE_CLIENTS = {
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
|
||||||
'REQUIRE_JS_PLAYER': False,
|
'REQUIRE_JS_PLAYER': False,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
},
|
},
|
||||||
# YouTube Kids videos aren't returned on this client for some reason
|
# YouTube Kids videos aren't returned on this client for some reason
|
||||||
'android_vr': {
|
'android_vr': {
|
||||||
|
@ -323,6 +330,7 @@ def build_innertube_clients():
|
||||||
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
|
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
|
||||||
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
|
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
|
||||||
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
|
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
|
||||||
|
ytcfg.setdefault('REQUIRE_PO_TOKEN', False)
|
||||||
ytcfg.setdefault('PLAYER_PARAMS', None)
|
ytcfg.setdefault('PLAYER_PARAMS', None)
|
||||||
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
|
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
|
||||||
|
|
||||||
|
@ -688,31 +696,46 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||||
'identity token', default=None, fatal=False)
|
'identity token', default=None, fatal=False)
|
||||||
|
|
||||||
@staticmethod
|
def _data_sync_id_to_delegated_session_id(self, data_sync_id):
|
||||||
def _extract_account_syncid(*args):
|
if not data_sync_id:
|
||||||
|
return
|
||||||
|
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
|
||||||
|
# and just "user_syncid||" for primary channel. We only want the channel_syncid
|
||||||
|
channel_syncid, _, user_syncid = data_sync_id.partition('||')
|
||||||
|
if user_syncid:
|
||||||
|
return channel_syncid
|
||||||
|
|
||||||
|
def _extract_account_syncid(self, *args):
|
||||||
"""
|
"""
|
||||||
Extract syncId required to download private playlists of secondary channels
|
Extract current session ID required to download private playlists of secondary channels
|
||||||
@params response and/or ytcfg
|
@params response and/or ytcfg
|
||||||
"""
|
"""
|
||||||
for data in args:
|
# ytcfg includes channel_syncid if on secondary channel
|
||||||
# ytcfg includes channel_syncid if on secondary channel
|
if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
|
||||||
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
|
return delegated_sid
|
||||||
if delegated_sid:
|
|
||||||
return delegated_sid
|
|
||||||
sync_ids = (try_get(
|
|
||||||
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
|
|
||||||
lambda x: x['DATASYNC_ID']), str) or '').split('||')
|
|
||||||
if len(sync_ids) >= 2 and sync_ids[1]:
|
|
||||||
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
|
|
||||||
# and just "user_syncid||" for primary channel. We only want the channel_syncid
|
|
||||||
return sync_ids[0]
|
|
||||||
|
|
||||||
@staticmethod
|
data_sync_id = self._extract_data_sync_id(*args)
|
||||||
def _extract_visitor_data(*args):
|
return self._data_sync_id_to_delegated_session_id(data_sync_id)
|
||||||
|
|
||||||
|
def _extract_data_sync_id(self, *args):
|
||||||
|
"""
|
||||||
|
Extract current account dataSyncId.
|
||||||
|
In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID||
|
||||||
|
@params response and/or ytcfg
|
||||||
|
"""
|
||||||
|
if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]:
|
||||||
|
return data_sync_id
|
||||||
|
|
||||||
|
return traverse_obj(
|
||||||
|
args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any))
|
||||||
|
|
||||||
|
def _extract_visitor_data(self, *args):
|
||||||
"""
|
"""
|
||||||
Extracts visitorData from an API response or ytcfg
|
Extracts visitorData from an API response or ytcfg
|
||||||
Appears to be used to track session state
|
Appears to be used to track session state
|
||||||
"""
|
"""
|
||||||
|
if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]:
|
||||||
|
return visitor_data
|
||||||
return get_first(
|
return get_first(
|
||||||
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
||||||
expected_type=str)
|
expected_type=str)
|
||||||
|
@ -1334,11 +1357,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||||
}
|
}
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||||
_POTOKEN_EXPERIMENTS = ('51217476', '51217102')
|
|
||||||
_BROKEN_CLIENTS = {
|
|
||||||
short_client_name(client): client
|
|
||||||
for client in ('android', 'android_creator', 'android_music')
|
|
||||||
}
|
|
||||||
_DEFAULT_CLIENTS = ('ios', 'web_creator')
|
_DEFAULT_CLIENTS = ('ios', 'web_creator')
|
||||||
|
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
@ -3701,6 +3719,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
**cls._get_checkok_params(),
|
**cls._get_checkok_params(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_config_po_token(self, client):
|
||||||
|
po_token_strs = self._configuration_arg('po_token', [], ie_key=YoutubeIE, casesense=True)
|
||||||
|
for token_str in po_token_strs:
|
||||||
|
po_token_client, sep, po_token = token_str.partition('+')
|
||||||
|
if not sep:
|
||||||
|
self.report_warning(
|
||||||
|
f'Invalid po_token configuration format. Expected "client+po_token", got "{token_str}"', only_once=True)
|
||||||
|
continue
|
||||||
|
if po_token_client == client:
|
||||||
|
return po_token
|
||||||
|
|
||||||
|
def fetch_po_token(self, client='web', visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
|
||||||
|
# PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
|
||||||
|
if not visitor_data and not self.is_authenticated and player_url:
|
||||||
|
self.report_warning(
|
||||||
|
f'Unable to fetch PO Token for {client} client: Missing required Visitor Data. '
|
||||||
|
f'You may need to pass Visitor Data with --extractor-args "youtube:visitor_data=XXX"')
|
||||||
|
return
|
||||||
|
|
||||||
|
config_po_token = self._get_config_po_token(client)
|
||||||
|
if config_po_token:
|
||||||
|
# PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
|
||||||
|
# if using first channel in an account then we don't need the data_sync_id anymore...
|
||||||
|
if not data_sync_id and self.is_authenticated and player_url:
|
||||||
|
self.report_warning(
|
||||||
|
f'Got a PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
|
||||||
|
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||||
|
|
||||||
|
return config_po_token
|
||||||
|
|
||||||
|
# Require PO Token if logged in for external fetching
|
||||||
|
if not data_sync_id and self.is_authenticated and player_url:
|
||||||
|
self.report_warning(
|
||||||
|
f'Unable to fetch PO Token for {client} client: Missing required Data Sync ID for account. '
|
||||||
|
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||||
|
return
|
||||||
|
|
||||||
|
return self._fetch_po_token(
|
||||||
|
client=client,
|
||||||
|
visitor_data=visitor_data,
|
||||||
|
data_sync_id=data_sync_id,
|
||||||
|
player_url=player_url,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _fetch_po_token(self, client, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
|
||||||
|
"""External PO Token fetch stub"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _is_agegated(player_response):
|
def _is_agegated(player_response):
|
||||||
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
|
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
|
||||||
|
@ -3717,13 +3783,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
def _is_unplayable(player_response):
|
def _is_unplayable(player_response):
|
||||||
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
|
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
|
||||||
|
|
||||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
|
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
|
||||||
|
|
||||||
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
|
|
||||||
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
|
|
||||||
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
|
|
||||||
headers = self.generate_api_headers(
|
headers = self.generate_api_headers(
|
||||||
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
|
ytcfg=player_ytcfg,
|
||||||
|
default_client=client,
|
||||||
|
visitor_data=visitor_data,
|
||||||
|
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||||
|
account_syncid=(
|
||||||
|
self._data_sync_id_to_delegated_session_id(data_sync_id)
|
||||||
|
or self._extract_account_syncid(master_ytcfg, initial_pr, player_ytcfg)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
yt_query = {
|
yt_query = {
|
||||||
'videoId': video_id,
|
'videoId': video_id,
|
||||||
|
@ -3734,6 +3804,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
|
if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
|
||||||
yt_query['params'] = player_params
|
yt_query['params'] = player_params
|
||||||
|
|
||||||
|
if po_token:
|
||||||
|
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
|
||||||
|
|
||||||
|
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
|
||||||
yt_query.update(self._generate_player_context(sts))
|
yt_query.update(self._generate_player_context(sts))
|
||||||
return self._extract_response(
|
return self._extract_response(
|
||||||
item_id=video_id, ep='player', query=yt_query,
|
item_id=video_id, ep='player', query=yt_query,
|
||||||
|
@ -3744,7 +3818,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
def _get_requested_clients(self, url, smuggled_data):
|
def _get_requested_clients(self, url, smuggled_data):
|
||||||
requested_clients = []
|
requested_clients = []
|
||||||
broken_clients = []
|
|
||||||
excluded_clients = []
|
excluded_clients = []
|
||||||
allowed_clients = sorted(
|
allowed_clients = sorted(
|
||||||
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
|
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
|
||||||
|
@ -3758,12 +3831,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
excluded_clients.append(client[1:])
|
excluded_clients.append(client[1:])
|
||||||
elif client not in allowed_clients:
|
elif client not in allowed_clients:
|
||||||
self.report_warning(f'Skipping unsupported client "{client}"')
|
self.report_warning(f'Skipping unsupported client "{client}"')
|
||||||
elif client in self._BROKEN_CLIENTS.values():
|
|
||||||
broken_clients.append(client)
|
|
||||||
else:
|
else:
|
||||||
requested_clients.append(client)
|
requested_clients.append(client)
|
||||||
# Force deprioritization of _BROKEN_CLIENTS for format de-duplication
|
|
||||||
requested_clients.extend(broken_clients)
|
|
||||||
if not requested_clients:
|
if not requested_clients:
|
||||||
requested_clients.extend(self._DEFAULT_CLIENTS)
|
requested_clients.extend(self._DEFAULT_CLIENTS)
|
||||||
for excluded_client in excluded_clients:
|
for excluded_client in excluded_clients:
|
||||||
|
@ -3788,19 +3857,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return pr_id
|
return pr_id
|
||||||
|
|
||||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||||
initial_pr = ignore_initial_response = None
|
initial_pr = None
|
||||||
if webpage:
|
if webpage:
|
||||||
if 'web' in clients:
|
|
||||||
experiments = traverse_obj(master_ytcfg, (
|
|
||||||
'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
|
|
||||||
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
|
|
||||||
self.report_warning(
|
|
||||||
'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
|
|
||||||
ignore_initial_response = True
|
|
||||||
initial_pr = self._search_json(
|
initial_pr = self._search_json(
|
||||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||||
|
|
||||||
prs = []
|
prs = []
|
||||||
|
deprioritized_prs = []
|
||||||
|
|
||||||
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
|
||||||
# Android player_response does not have microFormats which are needed for
|
# Android player_response does not have microFormats which are needed for
|
||||||
# extraction of some data. So we return the initial_pr with formats
|
# extraction of some data. So we return the initial_pr with formats
|
||||||
|
@ -3822,14 +3886,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
return
|
return
|
||||||
|
|
||||||
tried_iframe_fallback = False
|
tried_iframe_fallback = False
|
||||||
player_url = None
|
player_url = visitor_data = data_sync_id = None
|
||||||
skipped_clients = {}
|
skipped_clients = {}
|
||||||
while clients:
|
while clients:
|
||||||
|
deprioritize_pr = False
|
||||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||||
player_ytcfg = {}
|
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||||
if client == 'web':
|
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
|
||||||
player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
|
|
||||||
elif 'configs' not in self._configuration_arg('player_skip'):
|
|
||||||
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
|
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
|
||||||
|
|
||||||
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
|
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
|
||||||
|
@ -3842,34 +3905,53 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
player_url = self._download_player_url(video_id)
|
player_url = self._download_player_url(video_id)
|
||||||
tried_iframe_fallback = True
|
tried_iframe_fallback = True
|
||||||
|
|
||||||
pr = initial_pr if client == 'web' and not ignore_initial_response else None
|
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
|
||||||
for retry in self.RetryManager(fatal=False):
|
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||||
try:
|
po_token = self.fetch_po_token(
|
||||||
pr = pr or self._extract_player_response(
|
client=client, visitor_data=visitor_data,
|
||||||
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
|
data_sync_id=data_sync_id if self.is_authenticated else None,
|
||||||
player_url if require_js_player else None, initial_pr, smuggled_data)
|
player_url=player_url if require_js_player else None,
|
||||||
except ExtractorError as e:
|
)
|
||||||
self.report_warning(e)
|
|
||||||
break
|
require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
|
||||||
experiments = traverse_obj(pr, (
|
if not po_token and require_po_token:
|
||||||
'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
|
self.report_warning(
|
||||||
'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
|
f'No PO Token provided for {client} client, '
|
||||||
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
|
f'which is required for working {client} formats. '
|
||||||
pr = None
|
f'You can manually pass a PO Token for this client with '
|
||||||
retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
|
f'--extractor-args "youtube:po_token={client}+XXX"',
|
||||||
if not pr:
|
only_once=True)
|
||||||
|
deprioritize_pr = True
|
||||||
|
|
||||||
|
pr = initial_pr if client == 'web' else None
|
||||||
|
try:
|
||||||
|
pr = pr or self._extract_player_response(
|
||||||
|
client, video_id,
|
||||||
|
master_ytcfg=player_ytcfg or master_ytcfg,
|
||||||
|
player_ytcfg=player_ytcfg,
|
||||||
|
player_url=player_url,
|
||||||
|
initial_pr=initial_pr,
|
||||||
|
visitor_data=visitor_data,
|
||||||
|
data_sync_id=data_sync_id,
|
||||||
|
po_token=po_token)
|
||||||
|
except ExtractorError as e:
|
||||||
|
self.report_warning(e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if pr_id := self._invalid_player_response(pr, video_id):
|
if pr_id := self._invalid_player_response(pr, video_id):
|
||||||
skipped_clients[client] = pr_id
|
skipped_clients[client] = pr_id
|
||||||
elif pr:
|
elif pr:
|
||||||
# Save client name for introspection later
|
# Save client name for introspection later
|
||||||
name = short_client_name(client)
|
|
||||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||||
sd[STREAMING_DATA_CLIENT_NAME] = name
|
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||||
|
sd[STREAMING_DATA_PO_TOKEN] = po_token
|
||||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||||
f[STREAMING_DATA_CLIENT_NAME] = name
|
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||||
prs.append(pr)
|
f[STREAMING_DATA_PO_TOKEN] = po_token
|
||||||
|
if deprioritize_pr:
|
||||||
|
deprioritized_prs.append(pr)
|
||||||
|
else:
|
||||||
|
prs.append(pr)
|
||||||
|
|
||||||
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
|
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
|
||||||
if self._is_agegated(pr) and variant != 'tv_embedded':
|
if self._is_agegated(pr) and variant != 'tv_embedded':
|
||||||
|
@ -3893,6 +3975,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
# _producer, _testsuite, & _vr variants can also work around age-verification
|
# _producer, _testsuite, & _vr variants can also work around age-verification
|
||||||
append_client('web_creator', 'mediaconnect')
|
append_client('web_creator', 'mediaconnect')
|
||||||
|
|
||||||
|
prs.extend(deprioritized_prs)
|
||||||
|
|
||||||
if skipped_clients:
|
if skipped_clients:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
f'Skipping player responses from {"/".join(skipped_clients)} clients '
|
f'Skipping player responses from {"/".join(skipped_clients)} clients '
|
||||||
|
@ -4027,13 +4111,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||||
|
|
||||||
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
|
||||||
# _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
|
po_token = fmt.get(STREAMING_DATA_PO_TOKEN)
|
||||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
|
|
||||||
is_broken = client_name in self._BROKEN_CLIENTS
|
if po_token:
|
||||||
|
fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||||||
|
|
||||||
|
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||||
|
is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
|
||||||
if is_broken:
|
if is_broken:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken '
|
f'{video_id}: {client_name} client formats require a PO Token which was not provided. '
|
||||||
'and may yield HTTP Error 403. They will be deprioritized', only_once=True)
|
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
|
||||||
|
|
||||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||||
fps = int_or_none(fmt.get('fps')) or 0
|
fps = int_or_none(fmt.get('fps')) or 0
|
||||||
|
@ -4109,12 +4197,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
|
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
|
||||||
skip_manifests.add('dash')
|
skip_manifests.add('dash')
|
||||||
|
|
||||||
def process_manifest_format(f, proto, client_name, itag):
|
def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||||
key = (proto, f.get('language'))
|
key = (proto, f.get('language'))
|
||||||
if not all_formats and key in itags[itag]:
|
if not all_formats and key in itags[itag]:
|
||||||
return False
|
return False
|
||||||
itags[itag].add(key)
|
itags[itag].add(key)
|
||||||
|
|
||||||
|
if f.get('source_preference') is None:
|
||||||
|
f['source_preference'] = -1
|
||||||
|
|
||||||
|
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||||
|
# hls does not currently require PO Token
|
||||||
|
if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
|
||||||
|
self.report_warning(
|
||||||
|
f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
|
||||||
|
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
|
||||||
|
f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ')
|
||||||
|
f['source_preference'] -= 20
|
||||||
|
|
||||||
if itag and all_formats:
|
if itag and all_formats:
|
||||||
f['format_id'] = f'{itag}-{proto}'
|
f['format_id'] = f'{itag}-{proto}'
|
||||||
elif any(p != proto for p, _ in itags[itag]):
|
elif any(p != proto for p, _ in itags[itag]):
|
||||||
|
@ -4126,9 +4226,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
|
||||||
f['language_preference'] = PREFERRED_LANG_VALUE
|
f['language_preference'] = PREFERRED_LANG_VALUE
|
||||||
|
|
||||||
if f.get('source_preference') is None:
|
|
||||||
f['source_preference'] = -1
|
|
||||||
|
|
||||||
if itag in ('616', '235'):
|
if itag in ('616', '235'):
|
||||||
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||||||
f['source_preference'] += 100
|
f['source_preference'] += 100
|
||||||
|
@ -4149,23 +4246,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for sd in streaming_data:
|
for sd in streaming_data:
|
||||||
client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
|
client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
|
||||||
|
po_token = sd.get(STREAMING_DATA_PO_TOKEN)
|
||||||
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
|
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
|
||||||
if hls_manifest_url:
|
if hls_manifest_url:
|
||||||
|
if po_token:
|
||||||
|
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||||
subtitles = self._merge_subtitles(subs, subtitles)
|
subtitles = self._merge_subtitles(subs, subtitles)
|
||||||
for f in fmts:
|
for f in fmts:
|
||||||
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
||||||
r'/itag/(\d+)', f['url'], 'itag', default=None)):
|
r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
|
||||||
yield f
|
yield f
|
||||||
|
|
||||||
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
|
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
|
||||||
if dash_manifest_url:
|
if dash_manifest_url:
|
||||||
|
if po_token:
|
||||||
|
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||||
for f in formats:
|
for f in formats:
|
||||||
if process_manifest_format(f, 'dash', client_name, f['format_id']):
|
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
|
||||||
f['filesize'] = int_or_none(self._search_regex(
|
f['filesize'] = int_or_none(self._search_regex(
|
||||||
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
||||||
if needs_live_processing:
|
if needs_live_processing:
|
||||||
|
|
Loading…
Reference in New Issue