Merge remote-tracking branch 'dstftw/multifeed-videos' (closes #6360)

This commit is contained in:
Jaime Marquínez Ferrándiz 2015-07-29 21:55:20 +02:00
commit 34866b4836
1 changed files with 106 additions and 27 deletions

View File

@ -33,9 +33,11 @@ from ..utils import (
int_or_none, int_or_none,
orderedSet, orderedSet,
parse_duration, parse_duration,
smuggle_url,
str_to_int, str_to_int,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url,
uppercase_escape, uppercase_escape,
ISO3166Utils, ISO3166Utils,
) )
@ -562,6 +564,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format': '135', # bestvideo 'format': '135', # bestvideo
} }
}, },
{
# Multifeed videos (multiple cameras), URL is for Main Camera
'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
'info_dict': {
'id': 'jqWvoWXjCVs',
'title': 'teamPGP: Rocket League Noob Stream',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
},
'playlist': [{
'info_dict': {
'id': 'jqWvoWXjCVs',
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
},
}, {
'info_dict': {
'id': '6h8e8xoXJzg',
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
},
}, {
'info_dict': {
'id': 'PUOgX5z9xZw',
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
},
}, {
'info_dict': {
'id': 'teuwxikvS5k',
'ext': 'mp4',
'title': 'teamPGP: Rocket League Noob Stream (zim)',
'description': 'md5:dc7872fb300e143831327f1bae3af010',
'upload_date': '20150721',
'uploader': 'Beer Games Beer',
'uploader_id': 'beergamesbeer',
},
}],
'params': {
'skip_download': True,
},
}
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -893,6 +948,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return formats return formats
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
proto = ( proto = (
'http' if self._downloader.params.get('prefer_insecure', False) 'http' if self._downloader.params.get('prefer_insecure', False)
else 'https') else 'https')
@ -1009,6 +1066,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'"token" parameter not in video info for unknown reason', '"token" parameter not in video info for unknown reason',
video_id=video_id) video_id=video_id)
# title
if 'title' in video_info:
video_title = video_info['title'][0]
else:
self._downloader.report_warning('Unable to extract video title')
video_title = '_'
# description
video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
title="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
class="yt-uix-redirect-link"\s*>
[^<]+
</a>
''', r'\1', video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
if fd_mobj:
video_description = unescapeHTML(fd_mobj.group(1))
else:
video_description = ''
if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
if not self._downloader.params.get('noplaylist'):
entries = []
feed_ids = []
multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
for feed in multifeed_metadata_list.split(','):
feed_data = compat_parse_qs(feed)
entries.append({
'_type': 'url_transparent',
'ie_key': 'Youtube',
'url': smuggle_url(
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
{'force_singlefeed': True}),
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
})
feed_ids.append(feed_data['id'][0])
self.to_screen(
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
% (', '.join(feed_ids), video_id))
return self.playlist_result(entries, video_id, video_title, video_description)
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
if 'view_count' in video_info: if 'view_count' in video_info:
view_count = int(video_info['view_count'][0]) view_count = int(video_info['view_count'][0])
else: else:
@ -1034,13 +1140,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
self._downloader.report_warning('unable to extract uploader nickname') self._downloader.report_warning('unable to extract uploader nickname')
# title
if 'title' in video_info:
video_title = video_info['title'][0]
else:
self._downloader.report_warning('Unable to extract video title')
video_title = '_'
# thumbnail image # thumbnail image
# We try first to get a high quality image: # We try first to get a high quality image:
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">', m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@ -1080,26 +1179,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
unescapeHTML(m.group('content')) unescapeHTML(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
# description
video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
title="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
class="yt-uix-redirect-link"\s*>
[^<]+
</a>
''', r'\1', video_description)
video_description = clean_html(video_description)
else:
fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
if fd_mobj:
video_description = unescapeHTML(fd_mobj.group(1))
else:
video_description = ''
def _extract_count(count_name): def _extract_count(count_name):
return str_to_int(self._search_regex( return str_to_int(self._search_regex(
r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'