yt-dlp/yt_dlp/extractor/rtvnh.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import ExtractorError


class RTVNHIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.rtvnh.nl/video/131946',
        'md5': 'cdbec9f44550763c8afc96050fa747dc',
        'info_dict': {
            'id': '131946',
            'ext': 'mp4',
            'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
            'thumbnail': r're:^https?:.*\.jpg$'
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        meta = self._parse_json(self._download_webpage(
            'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)

        status = meta.get('status')
        if status != 200:
            raise ExtractorError(
                '%s returned error code %d' % (self.IE_NAME, status), expected=True)

        formats = []
        rtmp_formats = self._extract_smil_formats(
            'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
        formats.extend(rtmp_formats)

        for rtmp_format in rtmp_formats:
            rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
            rtsp_format = rtmp_format.copy()
            del rtsp_format['play_path']
            del rtsp_format['ext']
            rtsp_format.update({
                'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
                'url': rtmp_url.replace('rtmp://', 'rtsp://'),
                'protocol': 'rtsp',
            })
            formats.append(rtsp_format)
            http_base_url = rtmp_url.replace('rtmp://', 'http://')
            formats.extend(self._extract_m3u8_formats(
                http_base_url + '/playlist.m3u8', video_id, 'mp4',
                'm3u8_native', m3u8_id='hls', fatal=False))
            formats.extend(self._extract_f4m_formats(
                http_base_url + '/manifest.f4m',
                video_id, f4m_id='hds', fatal=False))
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': meta['title'].strip(),
            'thumbnail': meta.get('image'),
            'formats': formats
        }
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[rtvnh] Check status code 2015-08-12 21:11:55 +02:00			`from ..utils import ExtractorError`
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00

[rtvnh] Renamed rtvnhnl -> rtvnh 2015-08-12 20:26:13 +02:00			`class RTVNHIE(InfoExtractor):`
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00			`_VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'`
			`_TEST = {`
			`'url': 'http://www.rtvnh.nl/video/131946',`
[rtvnh] extract all formats 2016-07-05 15:45:39 +02:00			`'md5': 'cdbec9f44550763c8afc96050fa747dc',`
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00			`'info_dict': {`
			`'id': '131946',`
			`'ext': 'mp4',`
			`'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`'thumbnail': r're:^https?:.*\.jpg$'`
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00			`}`
			`}`

			`def _real_extract(self, url):`
			`video_id = self._match_id(url)`
[rtvnh] Carry long lines 2015-08-12 21:00:05 +02:00
			`meta = self._parse_json(self._download_webpage(`
			`'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)`
[rtvnh] Check status code 2015-08-12 21:11:55 +02:00
			`status = meta.get('status')`
			`if status != 200:`
			`raise ExtractorError(`
			`'%s returned error code %d' % (self.IE_NAME, status), expected=True)`

[rtvnh] extract all formats 2016-07-05 15:45:39 +02:00			`formats = []`
			`rtmp_formats = self._extract_smil_formats(`
			`'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)`
			`formats.extend(rtmp_formats)`

			`for rtmp_format in rtmp_formats:`
			`rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])`
			`rtsp_format = rtmp_format.copy()`
			`del rtsp_format['play_path']`
			`del rtsp_format['ext']`
			`rtsp_format.update({`
			`'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),`
			`'url': rtmp_url.replace('rtmp://', 'rtsp://'),`
			`'protocol': 'rtsp',`
			`})`
			`formats.append(rtsp_format)`
			`http_base_url = rtmp_url.replace('rtmp://', 'http://')`
			`formats.extend(self._extract_m3u8_formats(`
			`http_base_url + '/playlist.m3u8', video_id, 'mp4',`
			`'m3u8_native', m3u8_id='hls', fatal=False))`
			`formats.extend(self._extract_f4m_formats(`
			`http_base_url + '/manifest.f4m',`
			`video_id, f4m_id='hds', fatal=False))`
Remove _sort_formats from _extract_*_formats methods Now _sort_formats should be called explicitly. _sort_formats has been added to all the necessary places in code. Closes #8051 2016-03-27 03:03:08 +02:00			`self._sort_formats(formats)`
[rtvnh] Check status code 2015-08-12 21:11:55 +02:00
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00			`return {`
			`'id': video_id,`
			`'title': meta['title'].strip(),`
[rtvnh] Make thumbnail optional 2015-08-12 21:00:25 +02:00			`'thumbnail': meta.get('image'),`
[rtvnhnl] Added new extractor 2015-08-12 18:07:27 +02:00			`'formats': formats`
			`}`