From 81953d1ae53bc5b3344243480316d751004a4d40 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 4 Jul 2016 17:57:44 +0100 Subject: [PATCH] [kaltura] add support videos stored on custom kaltura servers(closes #5557) --- youtube_dl/extractor/generic.py | 15 +++++++++++++ youtube_dl/extractor/kaltura.py | 39 ++++++++++++++++++++++++--------- youtube_dl/utils.py | 2 ++ 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a9b61bf13c..764697bd29 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1295,6 +1295,21 @@ class GenericIE(InfoExtractor): 'uploader': 'cylus cyrus', }, }, + { + # video stored on custom kaltura server + 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv', + 'md5': '537617d06e64dfed891fa1593c4b30cc', + 'info_dict': { + 'id': '0_1iotm5bh', + 'ext': 'mp4', + 'title': 'Elecciones británicas: 5 lecciones para Rajoy', + 'description': 'md5:435a89d68b9760b92ce67ed227055f16', + 'uploader_id': 'videos.expansion@el-mundo.net', + 'upload_date': '20150429', + 'timestamp': 1430303472, + }, + 'add_ie': ['Kaltura'], + }, ] def report_following_redirect(self, new_url): diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index c75a958ba8..147bb8cf08 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -6,7 +6,6 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_urlparse, compat_parse_qs, ) @@ -15,6 +14,7 @@ from ..utils import ( ExtractorError, int_or_none, unsmuggle_url, + smuggle_url, ) @@ -34,7 +34,8 @@ class KalturaIE(InfoExtractor): )(?:/(?P[^?]+))?(?:\?(?P.*))? ) ''' - _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?' + _SERVICE_URL = 'http://cdnapi.kaltura.com' + _SERVICE_BASE = '/api_v3/index.php' _TESTS = [ { 'url': 'kaltura:269692:1_1jc2y3e4', @@ -88,7 +89,14 @@ class KalturaIE(InfoExtractor): (?P["\'])(?P.+?)(?P=q3) ''', webpage)) if mobj: - return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict() + embed_info = mobj.groupdict() + url = 'kaltura:%(partner_id)s:%(id)s' % embed_info + service_url = re.search( + ']+src=(?:["\'])((?:https?:)?//.+?)/p/%(partner_id)s/sp/%(partner_id)s00/embedIframeJs' % embed_info, + webpage) + if service_url: + url = smuggle_url(url, {'service_url': service_url.group(1)}) + return url def _kaltura_api_call(self, video_id, actions, *args, **kwargs): params = actions[0] @@ -97,9 +105,9 @@ class KalturaIE(InfoExtractor): for k, v in a.items(): params['%d:%s' % (i, k)] = v - query = compat_urllib_parse_urlencode(params) - url = self._API_BASE + query - data = self._download_json(url, video_id, *args, **kwargs) + data = self._download_json( + self._SERVICE_URL + self._SERVICE_BASE, + video_id, query=params, *args, **kwargs) status = data if len(actions) == 1 else data[0] if status.get('objectType') == 'KalturaAPIException': @@ -148,6 +156,9 @@ class KalturaIE(InfoExtractor): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) + service_url = smuggled_data.get('service_url') + if service_url: + self._SERVICE_URL = service_url mobj = re.match(self._VALID_URL, url) partner_id, entry_id = mobj.group('partner_id', 'id') @@ -201,12 +212,17 @@ class KalturaIE(InfoExtractor): unsigned_url += '?referrer=%s' % referrer return unsigned_url + data_url = info['dataUrl'] + if '/flvclipper/' in data_url: + data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url) + formats = [] for f in flavor_assets: # Continue if asset is not ready if f['status'] != 2: continue - video_url = sign_url('%s/flavorId/%s' % (info['dataUrl'], f['id'])) + video_url = sign_url( + '%s/flavorId/%s' % (data_url, f['id'])) formats.append({ 'format_id': '%(fileExt)s-%(bitrate)s' % f, 'ext': f.get('fileExt'), @@ -219,9 +235,12 @@ class KalturaIE(InfoExtractor): 'width': int_or_none(f.get('width')), 'url': video_url, }) - m3u8_url = sign_url(info['dataUrl'].replace('format/url', 'format/applehttp')) - formats.extend(self._extract_m3u8_formats( - m3u8_url, entry_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + if '/playManifest/' in data_url: + m3u8_url = sign_url(data_url.replace( + 'format/url', 'format/applehttp')) + formats.extend(self._extract_m3u8_formats( + m3u8_url, entry_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) self._check_formats(formats, entry_id) self._sort_formats(formats) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 495878a0e5..d302f39e47 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1444,6 +1444,8 @@ def shell_quote(args): def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ + url, idata = unsmuggle_url(url, {}) + data.update(idata) sdata = compat_urllib_parse_urlencode( {'__youtubedl_smuggle': json.dumps(data)}) return url + '#' + sdata