[extractor] Add `_search_nextjs_data` (#1386)

Authored by: selfisekai
This commit is contained in:
Lauren Liberda 2021-10-23 04:02:23 +02:00 committed by pukkandan
parent c586f9e8de
commit f98709af31
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
1 changed files with 7 additions and 0 deletions

View File

@ -1496,6 +1496,13 @@ class InfoExtractor(object):
break break
return dict((k, v) for k, v in info.items() if v is not None) return dict((k, v) for k, v in info.items() if v is not None)
def _search_nextjs_data(self, webpage, video_id, **kw):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
webpage, 'next.js data', **kw),
video_id, **kw)
@staticmethod @staticmethod
def _hidden_inputs(html): def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)