[ie/ard:mediathek] Revert to using old id (#8916)

Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2024-01-05 21:34:38 +01:00 committed by GitHub
parent ffbd4f2a02
commit b6951271ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 25 additions and 15 deletions

View File

@ -4,6 +4,7 @@ from functools import partial
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
OnDemandPagedList, OnDemandPagedList,
bug_reports_message,
determine_ext, determine_ext,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
@ -233,7 +234,7 @@ class ARDBetaMediathekIE(InfoExtractor):
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/]+/)? (?:[^/]+/)?
(?:player|live|video)/ (?:player|live|video)/
(?:(?P<display_id>[^?#]+)/)? (?:[^?#]+/)?
(?P<id>[a-zA-Z0-9]+) (?P<id>[a-zA-Z0-9]+)
/?(?:[?#]|$)''' /?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE'] _GEO_COUNTRIES = ['DE']
@ -242,8 +243,8 @@ class ARDBetaMediathekIE(InfoExtractor):
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', 'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
'info_dict': { 'info_dict': {
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', 'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'id': '12939099',
'title': 'Liebe auf vier Pfoten', 'title': 'Liebe auf vier Pfoten',
'description': r're:^Claudia Schmitt, Anwältin in Salzburg', 'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
'duration': 5222, 'duration': 5222,
@ -255,7 +256,7 @@ class ARDBetaMediathekIE(InfoExtractor):
'series': 'Filme im MDR', 'series': 'Filme im MDR',
'age_limit': 0, 'age_limit': 0,
'channel': 'MDR', 'channel': 'MDR',
'_old_archive_ids': ['ardbetamediathek 12939099'], '_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
}, },
}, { }, {
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
@ -276,37 +277,37 @@ class ARDBetaMediathekIE(InfoExtractor):
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
'md5': '1e73ded21cb79bac065117e80c81dc88', 'md5': '1e73ded21cb79bac065117e80c81dc88',
'info_dict': { 'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', 'id': '10049223',
'ext': 'mp4', 'ext': 'mp4',
'title': 'tagesschau, 20:00 Uhr', 'title': 'tagesschau, 20:00 Uhr',
'timestamp': 1636398000, 'timestamp': 1636398000,
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
'upload_date': '20211108', 'upload_date': '20211108',
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste', 'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
'duration': 915, 'duration': 915,
'episode': 'tagesschau, 20:00 Uhr', 'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau', 'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
'channel': 'ARD-Aktuell', 'channel': 'ARD-Aktuell',
'_old_archive_ids': ['ardbetamediathek 10049223'], '_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
}, },
}, { }, {
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', 'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'md5': 'c428b9effff18ff624d4f903bda26315', 'md5': 'c428b9effff18ff624d4f903bda26315',
'info_dict': { 'info_dict': {
'id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', 'id': '94834686',
'ext': 'mp4', 'ext': 'mp4',
'duration': 2700, 'duration': 2700,
'episode': '7 Tage ... unter harten Jungs', 'episode': '7 Tage ... unter harten Jungs',
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072', 'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
'upload_date': '20231005', 'upload_date': '20231005',
'timestamp': 1696491171, 'timestamp': 1696491171,
'display_id': '7-tage/7-tage-unter-harten-jungs/hr-fernsehen', 'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'series': '7 Tage ...', 'series': '7 Tage ...',
'channel': 'HR', 'channel': 'HR',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
'title': '7 Tage ... unter harten Jungs', 'title': '7 Tage ... unter harten Jungs',
'_old_archive_ids': ['ardbetamediathek 94834686'], '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
}, },
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -357,14 +358,25 @@ class ARDBetaMediathekIE(InfoExtractor):
}), get_all=False) }), get_all=False)
def _real_extract(self, url): def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id') display_id = self._match_id(url)
page_data = self._download_json( page_data = self._download_json(
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}', video_id, query={ f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={
'embedded': 'false', 'embedded': 'false',
'mcV6': 'true', 'mcV6': 'true',
}) })
# For user convenience we use the old contentId instead of the longer crid
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
if old_id is not None:
video_id = str(old_id)
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
else:
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
video_id = display_id
archive_ids = None
player_data = traverse_obj( player_data = traverse_obj(
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False) page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
is_live = player_data.get('type') == 'player_live' is_live = player_data.get('type') == 'player_live'
@ -419,8 +431,6 @@ class ARDBetaMediathekIE(InfoExtractor):
}) })
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none})) age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId'))
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
@ -438,7 +448,7 @@ class ARDBetaMediathekIE(InfoExtractor):
'channel': 'clipSourceName', 'channel': 'clipSourceName',
})), })),
**self._extract_episode_info(page_data.get('title')), **self._extract_episode_info(page_data.get('title')),
'_old_archive_ids': [make_archive_id(ARDBetaMediathekIE, old_id)], '_old_archive_ids': archive_ids,
} }