From b2fff30817848c1761a53d49fbe49aabd596089e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 13 Apr 2017 12:27:28 +0100 Subject: [PATCH] [go90] Add new extractor(closes #10127) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/go90.py | 92 ++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 youtube_dl/extractor/go90.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 45591f2a7..1671090f4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -384,6 +384,7 @@ from .globo import ( GloboArticleIE, ) from .go import GoIE +from .go90 import Go90IE from .godtube import GodTubeIE from .godtv import GodTVIE from .golem import GolemIE diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py new file mode 100644 index 000000000..3550eca7c --- /dev/null +++ b/youtube_dl/extractor/go90.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class Go90IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P[0-9a-zA-Z]+)' + _TEST = { + 'url': 'https://www.go90.com/videos/84BUqjLpf9D', + 'md5': 'efa7670dbbbf21a7b07b360652b24a32', + 'info_dict': { + 'id': '84BUqjLpf9D', + 'ext': 'mp4', + 'title': 'Inside The Utah Coalition Against Pornography Convention', + 'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.', + 'timestamp': 1491868800, + 'upload_date': '20170411', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'https://www.go90.com/api/view/items/' + video_id, + video_id, headers={ + 'Content-Type': 'application/json; charset=utf-8', + }, data=b'{"client":"web","device_type":"pc"}') + title = video_data['title'] + main_video_asset = video_data['main_video_asset'] + + thumbnails = [] + formats = [] + for asset in video_data.get('assets'): + if asset.get('id') == main_video_asset: + for source in asset.get('sources', []): + source_location = source.get('location') + if not source_location: + continue + source_type = source.get('type') + if source_type == 'hls': + m3u8_formats = self._extract_m3u8_formats( + source_location, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + for f in m3u8_formats: + mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url']) + if mobj: + height, tbr = mobj.groups() + height = int_or_none(height) + f.update({ + 'height': f.get('height') or height, + 'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None), + 'tbr': f.get('tbr') or int_or_none(tbr), + }) + formats.extend(m3u8_formats) + elif source_type == 'dash': + formats.extend(self._extract_mpd_formats( + source_location, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'format_id': source.get('name'), + 'url': source_location, + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + 'tbr': int_or_none(source.get('bitrate')), + }) + elif asset.get('type') == 'image': + asset_location = asset.get('location') + if not asset_location: + continue + thumbnails.append({ + 'url': asset_location, + 'width': int_or_none(asset.get('width')), + 'height': int_or_none(asset.get('height')), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': video_data.get('short_description'), + 'like_count': int_or_none(video_data.get('like_count')), + 'timestamp': parse_iso8601(video_data.get('released_at')), + }