yt-dlp/yt_dlp/extractor/atvat.py

import datetime as dt

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    float_or_none,
    jwt_encode_hs256,
    try_get,
)


class ATVAtIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'

    _TESTS = [{
        'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',
        'md5': '3c3b4aaca9f63e32b35e04a9c2515903',
        'info_dict': {
            'id': 'v-ce9cgn1e70n5-1',
            'ext': 'mp4',
            'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
        },
    }, {
        'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
        'only_matching': True,
    }]

    # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)
    _ACCESS_ID = 'x_atv'
    _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'

    def _extract_video_info(self, url, content, video):
        clip_id = content.get('splitId', content['id'])
        formats = []
        clip_urls = video['urls']
        for protocol, variant in clip_urls.items():
            source_url = try_get(variant, lambda x: x['clear']['url'])
            if not source_url:
                continue
            if protocol == 'dash':
                formats.extend(self._extract_mpd_formats(
                    source_url, clip_id, mpd_id=protocol, fatal=False))
            elif protocol == 'hls':
                formats.extend(self._extract_m3u8_formats(
                    source_url, clip_id, 'mp4', 'm3u8_native',
                    m3u8_id=protocol, fatal=False))
            else:
                formats.append({
                    'url': source_url,
                    'format_id': protocol,
                })

        return {
            'id': clip_id,
            'title': content.get('title'),
            'duration': float_or_none(content.get('duration')),
            'series': content.get('tvShowTitle'),
            'formats': formats,
        }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        json_data = self._parse_json(
            self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),
            video_id=video_id)

        video_title = json_data['views']['default']['page']['title']
        content_resource = json_data['views']['default']['page']['contentResource']
        content_id = content_resource[0]['id']
        content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
                       for id_, content in enumerate(content_resource)]

        time_of_request = dt.datetime.now()
        not_before = time_of_request - dt.timedelta(minutes=5)
        expire = time_of_request + dt.timedelta(minutes=5)
        payload = {
            'content_ids': {
                content_id: content_ids,
            },
            'secure_delivery': True,
            'iat': int(time_of_request.timestamp()),
            'nbf': int(not_before.timestamp()),
            'exp': int(expire.timestamp()),
        }
        jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})
        videos = self._download_json(
            'https://vas-v4.p7s1video.net/4.0/getsources',
            content_id, 'Downloading videos JSON', query={
                'token': jwt_token.decode('utf-8'),
            })

        video_id, videos_data = next(iter(videos['data'].items()))
        error_msg = try_get(videos_data, lambda x: x['error']['title'])
        if error_msg == 'Geo check failed':
            self.raise_geo_restricted(error_msg)
        elif error_msg:
            raise ExtractorError(error_msg)
        entries = [
            self._extract_video_info(url, content_resource[video['id']], video)
            for video in videos_data]

        return {
            '_type': 'multi_video',
            'id': video_id,
            'title': video_title,
            'entries': entries,
        }
[cleanup] Standardize `import datetime as dt` (#8978) 2024-02-25 01:16:34 +01:00			`import datetime as dt`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`from .common import InfoExtractor`
			`from ..utils import (`
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> 2024-05-26 21:27:21 +02:00			`ExtractorError,`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`float_or_none,`
			`jwt_encode_hs256,`
			`try_get,`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`)`


			`class ATVAtIE(InfoExtractor):`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`_VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P<id>.*)'`

[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`_TESTS = [{`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen',`
			`'md5': '3c3b4aaca9f63e32b35e04a9c2515903',`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`'info_dict': {`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`'id': 'v-ce9cgn1e70n5-1',`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`'ext': 'mp4',`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`},`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`}, {`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`'only_matching': True,`
			`}]`

[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`# extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger)`
			`_ACCESS_ID = 'x_atv'`
			`_ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia'`
[ATV.at] Fix extractor for ATV.at (#816) Authored-by: NeroBurner, coletdjnz Fixes https://github.com/ytdl-org/youtube-dl/issues/29079 2021-08-29 23:34:39 +02:00
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`def _extract_video_info(self, url, content, video):`
			`clip_id = content.get('splitId', content['id'])`
[ATV.at] Fix extractor for ATV.at (#816) Authored-by: NeroBurner, coletdjnz Fixes https://github.com/ytdl-org/youtube-dl/issues/29079 2021-08-29 23:34:39 +02:00			`formats = []`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`clip_urls = video['urls']`
			`for protocol, variant in clip_urls.items():`
			`source_url = try_get(variant, lambda x: x['clear']['url'])`
			`if not source_url:`
			`continue`
			`if protocol == 'dash':`
			`formats.extend(self._extract_mpd_formats(`
			`source_url, clip_id, mpd_id=protocol, fatal=False))`
			`elif protocol == 'hls':`
			`formats.extend(self._extract_m3u8_formats(`
			`source_url, clip_id, 'mp4', 'm3u8_native',`
			`m3u8_id=protocol, fatal=False))`
			`else:`
			`formats.append({`
			`'url': source_url,`
			`'format_id': protocol,`
			`})`

[ATV.at] Fix extractor for ATV.at (#816) Authored-by: NeroBurner, coletdjnz Fixes https://github.com/ytdl-org/youtube-dl/issues/29079 2021-08-29 23:34:39 +02:00			`return {`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`'id': clip_id,`
			`'title': content.get('title'),`
			`'duration': float_or_none(content.get('duration')),`
			`'series': content.get('tvShowTitle'),`
			`'formats': formats,`
[ATV.at] Fix extractor for ATV.at (#816) Authored-by: NeroBurner, coletdjnz Fixes https://github.com/ytdl-org/youtube-dl/issues/29079 2021-08-29 23:34:39 +02:00			`}`

[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`def _real_extract(self, url):`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`video_id = self._match_id(url)`
			`webpage = self._download_webpage(url, video_id)`
			`json_data = self._parse_json(`
			`self._search_regex(r'<script id="state" type="text/plain">(.*)</script>', webpage, 'json_data'),`
			`video_id=video_id)`

			`video_title = json_data['views']['default']['page']['title']`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`content_resource = json_data['views']['default']['page']['contentResource']`
			`content_id = content_resource[0]['id']`
			`content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}`
			`for id_, content in enumerate(content_resource)]`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00
[cleanup] Standardize `import datetime as dt` (#8978) 2024-02-25 01:16:34 +01:00			`time_of_request = dt.datetime.now()`
			`not_before = time_of_request - dt.timedelta(minutes=5)`
			`expire = time_of_request + dt.timedelta(minutes=5)`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`payload = {`
			`'content_ids': {`
			`content_id: content_ids,`
			`},`
			`'secure_delivery': True,`
			`'iat': int(time_of_request.timestamp()),`
			`'nbf': int(not_before.timestamp()),`
			`'exp': int(expire.timestamp()),`
			`}`
			`jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID})`
			`videos = self._download_json(`
			`'https://vas-v4.p7s1video.net/4.0/getsources',`
			`content_id, 'Downloading videos JSON', query={`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'token': jwt_token.decode('utf-8'),`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`})`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`video_id, videos_data = next(iter(videos['data'].items()))`
[ATVAt] Detect geo-restriction (#2777) Authored by: marieell 2022-02-15 10:16:49 +01:00			`error_msg = try_get(videos_data, lambda x: x['error']['title'])`
			`if error_msg == 'Geo check failed':`
			`self.raise_geo_restricted(error_msg)`
			`elif error_msg:`
			`raise ExtractorError(error_msg)`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`entries = [`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`self._extract_video_info(url, content_resource[video['id']], video)`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`for video in videos_data]`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00
			`return {`
			`'_type': 'multi_video',`
			`'id': video_id,`
			`'title': video_title,`
[atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner 2021-09-23 19:40:51 +02:00			`'entries': entries,`
[atvat] Add new extractor(closes #5325) 2017-03-25 18:13:13 +01:00			`}`