From f063a04f079f7af0078a02da39586b5e71a6c0b1 Mon Sep 17 00:00:00 2001 From: Magnus Kolstad Date: Fri, 5 Sep 2014 11:24:30 +0200 Subject: [PATCH] [dbtv] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dbtv.py | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/dbtv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e49ac3e527..c43dfd7ea1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -62,6 +62,7 @@ from .dailymotion import ( DailymotionUserIE, ) from .daum import DaumIE +from .dbtv import DBTVIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .drtv import DRTVIE diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py new file mode 100644 index 0000000000..cf76dbf053 --- /dev/null +++ b/youtube_dl/extractor/dbtv.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + ExtractorError +) + +class DBTVIE(InfoExtractor): + _VALID_URL = r'http://dbtv.no/(?P[0-9]+)/?(?P.*)$' + _TEST = { + 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen', + 'md5': 'b89953ed25dacb6edb3ef6c6f430f8bc', + 'info_dict': { + 'id': '3649835190001', + 'ext': 'mp4', + 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', + 'description': 'md5:d681bf2bb7dd3503892cedb9c2d0e6f2', + 'thumbnail': 'http://gfx.dbtv.no/thumbs/still/33100.jpg', + 'timestamp': 1404039863, + 'upload_date': '20140629', + 'duration': 69544, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # Download JSON file containing video info. + data = self._download_json('http://api.dbtv.no/discovery/%s' % video_id, video_id, 'Downloading media JSON') + # We only want the first video in the JSON API file. + video = data['playlist'][0] + + # Check for full HD video, else use the standard video URL + for i in range(0, len(video['renditions'])): + if int(video['renditions'][i]['width']) == 1280: + video_url = video['renditions'][i]['URL'] + break + else: + video_url = video['URL'] + + # Add access token to image or it will fail. + thumbnail = video['splash'] + + # Duration int. + duration = int(video['length']) + + # Timestamp is given in milliseconds. + timestamp = float(str(video['publishedAt'])[0:-3]) + + formats = [] + + # Video URL. + if video['URL'] is not None: + formats.append({ + 'url': video_url, + 'format_id': 'mp4', + 'ext': 'mp4' + }) + else: + raise ExtractorError('No download URL found for video: %s.' % video_id, expected=True) + + return { + 'id': video_id, + 'title': video['title'], + 'description': video['desc'], + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': video['views'], + 'formats': formats, + }