diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 3ca03fb6fb..86e20cb4be 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -64,8 +64,8 @@ class TestYoutubeSubtitles(BaseTestSubtitles): self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) - self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') - self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5') + self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b') + self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769') for lang in ['fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) @@ -73,13 +73,13 @@ class TestYoutubeSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'ttml' subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54') + self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2') def test_youtube_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() - self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') + self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') def test_youtube_automatic_captions(self): self.url = '8YoUxe5ncPo' @@ -88,9 +88,15 @@ class TestYoutubeSubtitles(BaseTestSubtitles): subtitles = self.getSubtitles() self.assertTrue(subtitles['it'] is not None) + def test_youtube_no_automatic_captions(self): + self.url = 'QRS8MkLhQmM' + self.DL.params['writeautomaticsub'] = True + subtitles = self.getSubtitles() + self.assertTrue(not subtitles) + def test_youtube_translated_subtitles(self): # This video has a subtitles track, which can be translated - self.url = 'Ky9eprVWzlI' + self.url = 'i0ZabxXmH4Y' self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = ['it'] subtitles = self.getSubtitles() diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 70a5bd3b0f..30a3e5c3c0 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -549,7 +549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, } - _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt', 'json3') _GEO_BYPASS = False @@ -1577,14 +1577,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response, video_id, fatal=False) if player_response: renderer = player_response['captions']['playerCaptionsTracklistRenderer'] - base_url = renderer['captionTracks'][0]['baseUrl'] - sub_lang_list = [] - for lang in renderer['translationLanguages']: - lang_code = lang.get('languageCode') - if lang_code: - sub_lang_list.append(lang_code) - return make_captions(base_url, sub_lang_list) - + caption_tracks = renderer['captionTracks'] + for caption_track in caption_tracks: + if 'kind' not in caption_track: + # not an automatic transcription + continue + base_url = caption_track['baseUrl'] + sub_lang_list = [] + for lang in renderer['translationLanguages']: + lang_code = lang.get('languageCode') + if lang_code: + sub_lang_list.append(lang_code) + return make_captions(base_url, sub_lang_list) + + self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id) + return {} # Some videos don't provide ttsurl but rather caption_tracks and # caption_translation_languages (e.g. 20LmZk1hakA) # Does not used anymore as of 22.06.2017