fix ytInitialData parsing

This commit is contained in:
siikamiika 2020-08-11 00:05:32 +03:00
parent f0f76a33dc
commit eaedbfd97e
2 changed files with 10 additions and 3 deletions

View File

@ -28,8 +28,14 @@ class YoutubeLiveChatReplayFD(FragmentFD):
return self._download_fragment(ctx, url, info_dict, headers) return self._download_fragment(ctx, url, info_dict, headers)
def parse_yt_initial_data(data): def parse_yt_initial_data(data):
raw_json = re.search(b'window\\["ytInitialData"\\]\\s*=\\s*(.*);', data).group(1) window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);'
var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);'
for patt in window_patt, var_patt:
try:
raw_json = re.search(patt, data).group(1)
return json.loads(raw_json) return json.loads(raw_json)
except AttributeError:
continue
self._prepare_and_start_frag_download(ctx) self._prepare_and_start_frag_download(ctx)

View File

@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_yt_initial_data(self, video_id, webpage): def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex( config = self._search_regex(
r'window\["ytInitialData"\]\s*=\s*(.*);', (r'window\["ytInitialData"\]\s*=\s*(.*);',
r'var\s+ytInitialData\s*=\s*(.*?);'),
webpage, 'ytInitialData', default=None) webpage, 'ytInitialData', default=None)
if config: if config:
return self._parse_json( return self._parse_json(