ytdl-nightly/youtube_dl/extractor/dailymotion.py
Ismael Mejia 953e32b2c1 [dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download.

The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.
2013-08-07 18:59:11 +02:00

135 lines
5.2 KiB
Python

import re
import json
import itertools
import socket
from .common import InfoExtractor
from .subtitles import SubtitlesIE
from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str,
get_element_by_attribute,
get_element_by_id,
ExtractorError,
)
class DailyMotionSubtitlesIE(SubtitlesIE):
def _get_available_subtitles(self, video_id):
request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
return sub_lang_list
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
sub_lang_list = self._get_available_subtitles(video_id)
return sub_lang_list[sub_lang]
def _request_automatic_caption(self, video_id, webpage):
self._downloader.report_warning(u'Automatic Captions not supported by dailymotion')
return {}
class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
IE_NAME = u'dailymotion'
_TEST = {
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
def _real_extract(self, url):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1).split('_')[0].split('?')[0]
video_extension = 'mp4'
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
webpage = self._download_webpage(request, video_id)
# Extract URL, uploader and title from webpage
self.report_extraction(video_id)
video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
# Looking for official user
r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
webpage, 'video uploader')
video_upload_date = None
mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
if mobj is not None:
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
info = json.loads(info)
# TODO: support choosing qualities
for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
'stream_h264_hq_url', 'stream_h264_url',
'stream_h264_ld_url']:
if info.get(key): # key in info and info[key]:
max_quality = key
self.to_screen(u'%s: Using %s' % (video_id, key))
break
else:
raise ExtractorError(u'Unable to extract video URL')
video_url = info[max_quality]
# subtitles
video_subtitles = None
video_webpage = None
if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_subtitles(video_id)
elif self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
return
if 'length_seconds' not in info:
self._downloader.report_warning(u'unable to extract video duration')
video_duration = ''
else:
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
return [{
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'ext': video_extension,
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url']
}]