yt-dlp/yt_dlp/extractor/tv4.py

import re

from .common import InfoExtractor
from ..utils import (
    bool_or_none,
    int_or_none,
    parse_iso8601,
    traverse_obj,
    url_or_none,
)


class TV4IE(InfoExtractor):
    IE_DESC = 'tv4.se and tv4play.se'
    _VALID_URL = r'''(?x)https?://(?:www\.)?
        (?:
            tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
            tv4play\.se/
            (?:
                (?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
                iframe/video/|
                film/|
                sport/|
            )
        )(?P<id>[0-9]+)'''
    _GEO_BYPASS = False
    _TESTS = [
        {
            # not geo-restricted
            'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
            'md5': 'cb837212f342d77cec06e6dad190e96d',
            'info_dict': {
                'id': '2491650',
                'ext': 'mp4',
                'title': 'Kalla Fakta 5 (english subtitles)',
                'description': '2491650',
                'series': 'Kalla fakta',
                'duration': 1335,
                'thumbnail': r're:^https?://[^/?#]+/api/v2/img/',
                'timestamp': 1385373240,
                'upload_date': '20131125',
            },
            'params': {'skip_download': 'm3u8'},
            'expected_warnings': ['Unable to download f4m manifest'],
        },
        {
            'url': 'http://www.tv4play.se/iframe/video/3054113',
            'md5': 'cb837212f342d77cec06e6dad190e96d',
            'info_dict': {
                'id': '3054113',
                'ext': 'mp4',
                'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
                'thumbnail': r're:^https?://.*\.jpg$',
                'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
                'timestamp': int,
                'upload_date': '20150130',
            },
            'skip': '404 Not Found',
        },
        {
            'url': 'http://www.tv4play.se/sport/3060959',
            'only_matching': True,
        },
        {
            'url': 'http://www.tv4play.se/film/2378136',
            'only_matching': True,
        },
        {
            'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
            'only_matching': True,
        },
        {
            'url': 'http://www.tv4play.se/program/farang/3922081',
            'only_matching': True,
        },
        {
            'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
            'only_matching': True,
        }
    ]

    def _call_api(self, endpoint, video_id, headers=None, query={}):
        return self._download_json(
            f'https://playback2.a2d.tv/{endpoint}/{video_id}', video_id,
            f'Downloading {endpoint} API JSON', headers=headers, query={
                'service': 'tv4',
                'device': 'browser',
                'protocol': 'hls',
                **query,
            })

    def _real_extract(self, url):
        video_id = self._match_id(url)

        info = traverse_obj(self._call_api('asset', video_id, query={
            'protocol': 'hls,dash',
            'drm': 'widevine',
        }), ('metadata', {dict})) or {}

        manifest_url = self._call_api(
            'play', video_id, headers=self.geo_verification_headers())['playbackItem']['manifestUrl']

        formats, subtitles = [], {}

        fmts, subs = self._extract_m3u8_formats_and_subtitles(
            manifest_url, video_id, 'mp4',
            'm3u8_native', m3u8_id='hls', fatal=False)
        formats.extend(fmts)
        subtitles = self._merge_subtitles(subtitles, subs)

        fmts, subs = self._extract_mpd_formats_and_subtitles(
            manifest_url.replace('.m3u8', '.mpd'),
            video_id, mpd_id='dash', fatal=False)
        formats.extend(fmts)
        subtitles = self._merge_subtitles(subtitles, subs)

        fmts = self._extract_f4m_formats(
            manifest_url.replace('.m3u8', '.f4m'),
            video_id, f4m_id='hds', fatal=False)
        formats.extend(fmts)

        fmts, subs = self._extract_ism_formats_and_subtitles(
            re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
            video_id, ism_id='mss', fatal=False)
        formats.extend(fmts)
        subtitles = self._merge_subtitles(subtitles, subs)

        if not formats and info.get('is_geo_restricted'):
            self.raise_geo_restricted(
                'This video is not available from your location due to geo-restriction, or not being authenticated',
                countries=['SE'])

        return {
            'id': video_id,
            'formats': formats,
            'subtitles': subtitles,
            **traverse_obj(info, {
                'title': ('title', {str}),
                'description': ('description', {str}),
                'timestamp': (('broadcast_date_time', 'broadcastDateTime'), {parse_iso8601}),
                'duration': ('duration', {int_or_none}),
                'thumbnail': ('image', {url_or_none}),
                'is_live': ('isLive', {bool_or_none}),
                'series': ('seriesTitle', {str}),
                'season_number': ('seasonNumber', {int_or_none}),
                'episode': ('episodeTitle', {str}),
                'episode_number': ('episodeNumber', {int_or_none}),
            }, get_all=False),
        }
[tv4] fix format extraction(closes #16650) 2018-06-06 01:41:08 +02:00			`import re`

[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`from .common import InfoExtractor`
			`from ..utils import (`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`bool_or_none,`
[tv4] Fix hls and hds formats (Closes #10659) 2016-09-15 19:54:34 +02:00			`int_or_none,`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`parse_iso8601,`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`traverse_obj,`
			`url_or_none,`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`)`


			`class TV4IE(InfoExtractor):`
			`IE_DESC = 'tv4.se and tv4play.se'`
			`_VALID_URL = r'''(?x)https?://(?:www\.)?`
			`(?:`
			`tv4\.se/(?:[^/]+)/klipp/(?:.*)-\|`
			`tv4play\.se/`
			`(?:`
Update to ytdl-2021.02.04.1 except youtube 2021-02-04 08:56:01 +01:00			`(?:program\|barn)/(?:(?:[^/]+/){1,2}\|(?:[^\?]+)\?video_id=)\|`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`iframe/video/\|`
			`film/\|`
			`sport/\|`
			`)`
			`)(?P<id>[0-9]+)'''`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`_GEO_BYPASS = False`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`_TESTS = [`
			`{`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`# not geo-restricted`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',`
[tv4] improve extraction(closes #11698) - remove check for requires_subscription - extract more formats - extract subtitles 2017-01-13 10:19:53 +01:00			`'md5': 'cb837212f342d77cec06e6dad190e96d',`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`'info_dict': {`
			`'id': '2491650',`
			`'ext': 'mp4',`
			`'title': 'Kalla Fakta 5 (english subtitles)',`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`'description': '2491650',`
			`'series': 'Kalla fakta',`
			`'duration': 1335,`
			`'thumbnail': r're:^https?://[^/?#]+/api/v2/img/',`
			`'timestamp': 1385373240,`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`'upload_date': '20131125',`
			`},`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`'params': {'skip_download': 'm3u8'},`
			`'expected_warnings': ['Unable to download f4m manifest'],`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`},`
			`{`
			`'url': 'http://www.tv4play.se/iframe/video/3054113',`
[tv4] improve extraction(closes #11698) - remove check for requires_subscription - extract more formats - extract subtitles 2017-01-13 10:19:53 +01:00			`'md5': 'cb837212f342d77cec06e6dad190e96d',`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`'info_dict': {`
			`'id': '3054113',`
			`'ext': 'mp4',`
			`'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`'thumbnail': r're:^https?://.*\.jpg$',`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',`
			`'timestamp': int,`
			`'upload_date': '20150130',`
			`},`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`'skip': '404 Not Found',`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`},`
			`{`
			`'url': 'http://www.tv4play.se/sport/3060959',`
			`'only_matching': True,`
			`},`
			`{`
			`'url': 'http://www.tv4play.se/film/2378136',`
			`'only_matching': True,`
			`},`
			`{`
			`'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',`
			`'only_matching': True,`
			`},`
[tv4] Relax _VALID_URL (closes #14206) 2017-09-14 18:50:19 +02:00			`{`
[tv4] fix a test URL 2017-09-14 20:47:23 +02:00			`'url': 'http://www.tv4play.se/program/farang/3922081',`
[tv4] Relax _VALID_URL (closes #14206) 2017-09-14 18:50:19 +02:00			`'only_matching': True,`
Update to ytdl-2021.02.04.1 except youtube 2021-02-04 08:56:01 +01:00			`},`
			`{`
			`'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',`
			`'only_matching': True,`
[tv4] Relax _VALID_URL (closes #14206) 2017-09-14 18:50:19 +02:00			`}`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`]`

[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`def _call_api(self, endpoint, video_id, headers=None, query={}):`
			`return self._download_json(`
			`f'https://playback2.a2d.tv/{endpoint}/{video_id}', video_id,`
			`f'Downloading {endpoint} API JSON', headers=headers, query={`
			`'service': 'tv4',`
			`'device': 'browser',`
			`'protocol': 'hls',`
			`**query,`
			`})`

[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`

[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`info = traverse_obj(self._call_api('asset', video_id, query={`
			`'protocol': 'hls,dash',`
			`'drm': 'widevine',`
			`}), ('metadata', {dict})) or {}`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`manifest_url = self._call_api(`
			`'play', video_id, headers=self.geo_verification_headers())['playbackItem']['manifestUrl']`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`formats, subtitles = [], {}`
[tv4] Extract subtitles from streaming manifests 2021-04-15 10:19:32 +02:00
			`fmts, subs = self._extract_m3u8_formats_and_subtitles(`
[tv4] fix format extraction(closes #16650) 2018-06-06 01:41:08 +02:00			`manifest_url, video_id, 'mp4',`
			`'m3u8_native', m3u8_id='hls', fatal=False)`
[tv4] Extract subtitles from streaming manifests 2021-04-15 10:19:32 +02:00			`formats.extend(fmts)`
			`subtitles = self._merge_subtitles(subtitles, subs)`

			`fmts, subs = self._extract_mpd_formats_and_subtitles(`
[tv4] fix format extraction(closes #16650) 2018-06-06 01:41:08 +02:00			`manifest_url.replace('.m3u8', '.mpd'),`
[tv4] Extract subtitles from streaming manifests 2021-04-15 10:19:32 +02:00			`video_id, mpd_id='dash', fatal=False)`
			`formats.extend(fmts)`
			`subtitles = self._merge_subtitles(subtitles, subs)`

			`fmts = self._extract_f4m_formats(`
[tv4] fix format extraction(closes #16650) 2018-06-06 01:41:08 +02:00			`manifest_url.replace('.m3u8', '.f4m'),`
[tv4] Extract subtitles from streaming manifests 2021-04-15 10:19:32 +02:00			`video_id, f4m_id='hds', fatal=False)`
			`formats.extend(fmts)`

			`fmts, subs = self._extract_ism_formats_and_subtitles(`
[tv4] Fix ISM formats extraction (closes #24667) 2020-04-07 17:55:36 +02:00			`re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),`
[tv4] Extract subtitles from streaming manifests 2021-04-15 10:19:32 +02:00			`video_id, ism_id='mss', fatal=False)`
			`formats.extend(fmts)`
			`subtitles = self._merge_subtitles(subtitles, subs)`
[tv4] Bypass geo restriction and improve detection 2017-02-19 00:24:38 +01:00
			`if not formats and info.get('is_geo_restricted'):`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`self.raise_geo_restricted(`
			`'This video is not available from your location due to geo-restriction, or not being authenticated',`
			`countries=['SE'])`
[tv4] Bypass geo restriction and improve detection 2017-02-19 00:24:38 +01:00
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`return {`
			`'id': video_id,`
			`'formats': formats,`
[tv4] Extract subtitles from streaming manifests 2021-04-15 10:19:32 +02:00			`'subtitles': subtitles,`
[extractor/tv4] Fix extractor (#5649) Closes #5535 Authored by: TxI5, dirkf 2023-06-15 19:57:25 +02:00			`**traverse_obj(info, {`
			`'title': ('title', {str}),`
			`'description': ('description', {str}),`
			`'timestamp': (('broadcast_date_time', 'broadcastDateTime'), {parse_iso8601}),`
			`'duration': ('duration', {int_or_none}),`
			`'thumbnail': ('image', {url_or_none}),`
			`'is_live': ('isLive', {bool_or_none}),`
			`'series': ('seriesTitle', {str}),`
			`'season_number': ('seasonNumber', {int_or_none}),`
			`'episode': ('episodeTitle', {str}),`
			`'episode_number': ('episodeNumber', {int_or_none}),`
			`}, get_all=False),`
[tv4] Add new extractor (Closes #4839) (Closes #2103) 2015-02-20 13:13:41 +01:00			`}`