From 283dca56feb9f23978733810ab155472d6473c38 Mon Sep 17 00:00:00 2001 From: dirkf Date: Fri, 7 Mar 2025 21:02:28 +0000 Subject: [PATCH] [YouTube] Initially support tce-style player JS * resolves #33079 --- test/test_youtube_signature.py | 21 +++++++++++++++++---- youtube_dl/extractor/youtube.py | 23 +++++++++++++---------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 67ef75fde..166614e62 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -223,6 +223,18 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js', 'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg', ), + ( + 'https://www.youtube.com/s/player/f6e09c70/player_ias.vflset/en_US/base.js', + 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', + ), + ( + 'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js', + 'W9HJZKktxuYoDTqW', 'jHbbkcaxm54', + ), + ( + 'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js', + 'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO', + ), ] @@ -284,7 +296,7 @@ def t_factory(name, sig_func, url_pattern): def signature(jscode, sig_input): - func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) + func = YoutubeIE(FakeYDL({'cachedir': False}))._parse_sig_js(jscode) src_sig = ( compat_str(string.printable[:sig_input]) if isinstance(sig_input, int) else sig_input) @@ -292,9 +304,10 @@ def signature(jscode, sig_input): def n_sig(jscode, sig_input): - funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) - return JSInterpreter(jscode).call_function( - funcname, sig_input, _ytdl_do_not_return=sig_input) + ie = YoutubeIE(FakeYDL({'cachedir': False})) + jsi = JSInterpreter(jscode) + jsi, _, func_code = ie._extract_n_function_code_jsi(sig_input, jsi) + return ie._extract_n_function_from_code(jsi, func_code)(sig_input) make_sig_test = t_factory( diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9e200105e..11bed6cae 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1607,16 +1607,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage or '', 'player URL', fatal=False) if player_url: ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},) - player_url = traverse_obj( + return traverse_obj( ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'), get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u)) - nplayer_url, is_tce = re.subn(r'(?<=/player_ias)_tce(?=\.vflset/)', '', player_url or '') - if is_tce: - # TODO: Add proper support for the 'tce' variant players - # See https://github.com/yt-dlp/yt-dlp/issues/12398 - self.write_debug('Modifying tce player URL: {0}'.format(player_url)) - return nplayer_url - return player_url def _download_player_url(self, video_id, fatal=False): res = self._download_webpage( @@ -1858,12 +1851,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if func_code: return jsi, player_id, func_code + return self._extract_n_function_code_jsi(video_id, jsi, player_id) - func_name = self._extract_n_function_name(jscode) + def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None): + + var_ay = self._search_regex( + r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])', + jsi.code, 'useful values', default='') + + func_name = self._extract_n_function_name(jsi.code) func_code = jsi.extract_function_code(func_name) + if var_ay: + func_code = (func_code[0], ';\n'.join((var_ay, func_code[1]))) - self.cache.store('youtube-nsig', player_id, func_code) + if player_id: + self.cache.store('youtube-nsig', player_id, func_code) return jsi, player_id, func_code def _extract_n_function_from_code(self, jsi, func_code):