]+data-mychannels-type="video"[^>]*>)', webpage):
+ mychannels_id = extract_attributes(element).get('data-mychannels-id')
+ if mychannels_id:
+ entries.append('https://mychannels.video/embed/' + mychannels_id)
+ return entries
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, site_id = mobj.group('id', 'site_id')
+ production_id = self._match_id(url)
+ production = self._download_json(
+ 'https://embed.mychannels.video/sdk/production/' + production_id,
+ production_id, query={'options': 'UUUU_default'})['productions'][0]
+ title = production['title']
- webpage = self._download_webpage(url, video_id)
-
- config = self._parse_json(
- self._search_regex(
- r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
- webpage, 'config', default='{}'), video_id,
- transform_source=lambda s: s.replace(
- '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
-
- vod_id = config.get('vodId') or self._search_regex(
- (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
- r'"vodId"\s*:\s*"(.+?)"',
- r'<[^>]+id=["\']vod-(\d+)'),
- webpage, 'video_id', default=None)
-
- # clip, no authentication required
- if not vod_id:
- player = self._parse_json(
- self._search_regex(
- r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
- default=''),
- video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
- if player:
- video = player[-1]
- if video['videoUrl'] in ('http', 'https'):
- return self.url_result(video['url'], MedialaanIE.ie_key())
- info = {
- 'id': video_id,
- 'url': video['videoUrl'],
- 'title': video['title'],
- 'thumbnail': video.get('imageUrl'),
- 'timestamp': int_or_none(video.get('createdDate')),
- 'duration': int_or_none(video.get('duration')),
- }
+ formats = []
+ for source in (production.get('sources') or []):
+ src = source.get('src')
+ if not src:
+ continue
+ ext = mimetype2ext(source.get('type'))
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, production_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
else:
- info = self._parse_html5_media_entries(
- url, webpage, video_id, m3u8_id='hls')[0]
- info.update({
- 'id': video_id,
- 'title': self._html_search_meta('description', webpage),
- 'duration': parse_duration(self._html_search_meta('duration', webpage)),
+ formats.append({
+ 'ext': ext,
+ 'url': src,
})
- # vod, authentication required
- else:
- if not self._logged_in:
- self._login()
+ self._sort_formats(formats)
- settings = self._parse_json(
- self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings', default='{}'),
- video_id)
-
- def get(container, item):
- return try_get(
- settings, lambda x: x[container][item],
- compat_str) or self._search_regex(
- r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
- default=None)
-
- app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
- sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
-
- data = self._download_json(
- 'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
- video_id, query={
- 'app_id': app_id,
- 'user_network': sso,
- 'UID': self._uid,
- 'UIDSignature': self._uid_signature,
- 'signatureTimestamp': self._signature_timestamp,
- })
-
- formats = self._extract_m3u8_formats(
- data['response']['uri'], video_id, entry_protocol='m3u8_native',
- ext='mp4', m3u8_id='hls')
-
- self._sort_formats(formats)
-
- info = {
- 'id': vod_id,
- 'formats': formats,
- }
-
- api_key = get('vod', 'apiKey')
- channel = get('medialaanGigya', 'channel')
-
- if api_key:
- videos = self._download_json(
- 'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
- query={
- 'channels': channel,
- 'ids': vod_id,
- 'limit': 1,
- 'apikey': api_key,
- })
- if videos:
- video = try_get(
- videos, lambda x: x['response']['videos'][0], dict)
- if video:
- def get(container, item, expected_type=None):
- return try_get(
- video, lambda x: x[container][item], expected_type)
-
- def get_string(container, item):
- return get(container, item, compat_str)
-
- info.update({
- 'series': get_string('program', 'title'),
- 'season': get_string('season', 'title'),
- 'season_number': int_or_none(get('season', 'number')),
- 'season_id': get_string('season', 'id'),
- 'episode': get_string('episode', 'title'),
- 'episode_number': int_or_none(get('episode', 'number')),
- 'episode_id': get_string('episode', 'id'),
- 'duration': int_or_none(
- video.get('duration')) or int_or_none(
- video.get('durationMillis'), scale=1000),
- 'title': get_string('episode', 'title'),
- 'description': get_string('episode', 'text'),
- 'timestamp': unified_timestamp(get_string(
- 'publication', 'begin')),
- })
-
- if not info.get('title'):
- info['title'] = try_get(
- config, lambda x: x['videoConfig']['title'],
- compat_str) or self._html_search_regex(
- r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
- default=None) or self._og_search_title(webpage)
-
- if not info.get('description'):
- info['description'] = self._html_search_regex(
- r'
]+class="field-item\s+even">\s*
(.+?)
',
- webpage, 'description', default=None)
-
- return info
+ return {
+ 'id': production_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': production.get('posterUrl'),
+ 'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
+ 'duration': int_or_none(production.get('duration')) or None,
+ }
diff --git a/youtube_dl/extractor/vtm.py b/youtube_dl/extractor/vtm.py
new file mode 100644
index 000000000..093f1aa69
--- /dev/null
+++ b/youtube_dl/extractor/vtm.py
@@ -0,0 +1,62 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class VTMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?]+)~v(?P
[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
+ _TEST = {
+ 'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
+ 'md5': '37dca85fbc3a33f2de28ceb834b071f8',
+ 'info_dict': {
+ 'id': '192445',
+ 'ext': 'mp4',
+ 'title': 'Gast vernielt Genkse hotelkamer',
+ 'timestamp': 1611060180,
+ 'upload_date': '20210119',
+ 'duration': 74,
+ # TODO: fix url _type result processing
+ # 'series': 'Op Interventie',
+ }
+ }
+
+ def _real_extract(self, url):
+ uuid = self._match_id(url)
+ video = self._download_json(
+ 'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
+ uuid, query={
+ 'query': '''{
+ getComponent(type: Video, uuid: "%s") {
+ ... on Video {
+ description
+ duration
+ myChannelsVideo
+ program {
+ title
+ }
+ publishedAt
+ title
+ }
+ }
+}''' % uuid,
+ }, headers={
+ 'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
+ })['data']['getComponent']
+
+ return {
+ '_type': 'url',
+ 'id': uuid,
+ 'title': video.get('title'),
+ 'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
+ 'description': video.get('description'),
+ 'timestamp': parse_iso8601(video.get('publishedAt')),
+ 'duration': int_or_none(video.get('duration')),
+ 'series': try_get(video, lambda x: x['program']['title']),
+ 'ie_key': 'Medialaan',
+ }