[YouTube] Add auto-translated subtitles

Closes #1245
This commit is contained in:
pukkandan 2021-10-12 12:03:56 +05:30
parent 7b38649845
commit ecdc9049c0
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
1 changed files with 26 additions and 23 deletions

View File

@ -2964,15 +2964,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} }
pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict) pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
if pctr:
def get_lang_code(track):
return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
or track.get('languageCode'))
# Converted into dicts to remove duplicates # Converted into dicts to remove duplicates
captions = { captions = {
sub.get('baseUrl'): sub get_lang_code(sub): sub
for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])} for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
translation_languages = { translation_languages = {
lang.get('languageCode'): lang.get('languageName') lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])} for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
subtitles = {}
if pctr:
def process_language(container, base_url, lang_code, sub_name, query): def process_language(container, base_url, lang_code, sub_name, query):
lang_subs = container.setdefault(lang_code, []) lang_subs = container.setdefault(lang_code, [])
for fmt in self._SUBTITLE_FORMATS: for fmt in self._SUBTITLE_FORMATS:
@ -2985,28 +2989,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'name': sub_name, 'name': sub_name,
}) })
for base_url, caption_track in captions.items(): subtitles, automatic_captions = {}, {}
for lang_code, caption_track in captions.items():
base_url = caption_track.get('baseUrl')
if not base_url: if not base_url:
continue continue
lang_name = self._get_text(caption_track, 'name', max_runs=1)
if caption_track.get('kind') != 'asr': if caption_track.get('kind') != 'asr':
lang_code = (
remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
or caption_track.get('languageCode'))
if not lang_code: if not lang_code:
continue continue
process_language( process_language(
subtitles, base_url, lang_code, subtitles, base_url, lang_code, lang_name, {})
traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False), if not caption_track.get('isTranslatable'):
{})
continue continue
automatic_captions = {}
for trans_code, trans_name in translation_languages.items(): for trans_code, trans_name in translation_languages.items():
if not trans_code: if not trans_code:
continue continue
if caption_track.get('kind') != 'asr':
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, template=' from %s')
process_language( process_language(
automatic_captions, base_url, trans_code, automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
self._get_text(trans_name, max_runs=1),
{'tlang': trans_code})
info['automatic_captions'] = automatic_captions info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles info['subtitles'] = subtitles
@ -3054,7 +3057,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try: try:
# This will error if there is no livechat # This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
info['subtitles']['live_chat'] = [{ info.setdefault('subtitles', {})['live_chat'] = [{
'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
'video_id': video_id, 'video_id': video_id,
'ext': 'json', 'ext': 'json',