From 10677ece81b7ed05bb84a0dbaf5bd237107eeb62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 23 Jan 2016 00:04:33 +0600 Subject: [PATCH] [nuevo] Simplify nuevo extractors (Closes #7728) --- youtube_dl/extractor/anitube.py | 9 ++--- youtube_dl/extractor/nuevo.py | 25 +++++++------- youtube_dl/extractor/trollvids.py | 55 ++++++++++++------------------- youtube_dl/extractor/trutube.py | 13 +++----- 4 files changed, 41 insertions(+), 61 deletions(-) diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 73690df82..2fd912da4 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import re - from .nuevo import NuevoBaseIE @@ -22,12 +20,11 @@ class AnitubeIE(NuevoBaseIE): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) key = self._search_regex( r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key') - config_url = 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key - return self._extract_nuevo(config_url, video_id) + return self._extract_nuevo( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id) diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dl/extractor/nuevo.py index ccc697e4f..225da033c 100644 --- a/youtube_dl/extractor/nuevo.py +++ b/youtube_dl/extractor/nuevo.py @@ -11,22 +11,23 @@ from ..utils import ( class NuevoBaseIE(InfoExtractor): def _extract_nuevo(self, config_url, video_id): - tree = self._download_xml(config_url, video_id, transform_source=lambda s: s.strip()) + config = self._download_xml( + config_url, video_id, transform_source=lambda s: s.strip()) - title = xpath_text(tree, './title') - if title: - title = title.strip() - - thumbnail = xpath_text(tree, './image') - duration = float_or_none(xpath_text(tree, './duration')) + title = xpath_text(config, './title', 'title', fatal=True).strip() + video_id = xpath_text(config, './mediaid', default=video_id) + thumbnail = xpath_text(config, './image') + duration = float_or_none(xpath_text(config, './duration')) formats = [] for element_name, format_id in (('file', 'sd'), ('filehd', 'hd')): - video_url = tree.find(element_name) - video_url is None or formats.append({ - 'format_id': format_id, - 'url': video_url.text - }) + video_url = xpath_text(config, element_name) + if video_url: + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) + self._check_formats(formats, video_id) return { 'id': video_id, diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py index e4fe620f7..d239949a6 100644 --- a/youtube_dl/extractor/trollvids.py +++ b/youtube_dl/extractor/trollvids.py @@ -1,49 +1,36 @@ # encoding: utf-8 from __future__ import unicode_literals -from .nuevo import NuevoBaseIE - -from ..compat import ( - compat_urllib_parse_unquote -) - import re +from .nuevo import NuevoBaseIE + class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'http://(?:www\.)?trollvids\.com/+video/+(?P[0-9]+)/+(?P[^?&]+)' + _VALID_URL = r'http://(?:www\.)?trollvids\.com/video/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' IE_NAME = 'trollvids' + _TEST = { + 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': '2349002', + 'ext': 'mp4', + 'title': '【MMD R-18】ガールフレンド carry_me_off', + 'age_limit': 18, + 'duration': 216.78, + }, + } def _real_extract(self, url): - match = re.match(self._VALID_URL, url) - - video_id = match.group('id') - raw_video_title = match.group('title') - url = 'http://trollvids.com/video/%s/%s' % (video_id, raw_video_title) - config_url = 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id - - info = self._extract_nuevo(config_url, video_id) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + info = self._extract_nuevo( + 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id, + video_id) info.update({ - 'webpage_url': url, + 'display_id': display_id, 'age_limit': 18 }) - - if 'title' not in info: - info['title'] = compat_urllib_parse_unquote(raw_video_title) - return info - - _TESTS = [ - { - 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', - 'info_dict': { - 'id': '2349002', - 'ext': 'mp4', - 'title': "【MMD R-18】ガールフレンド carry_me_off", - 'age_limit': 18, - 'duration': 216.78, - }, - }, - ] diff --git a/youtube_dl/extractor/trutube.py b/youtube_dl/extractor/trutube.py index d7ec2ec26..d55e0c563 100644 --- a/youtube_dl/extractor/trutube.py +++ b/youtube_dl/extractor/trutube.py @@ -4,7 +4,7 @@ from .nuevo import NuevoBaseIE class TruTubeIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?trutube\.tv/(?:video/|nuevo/player/embed\.php\?v=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-', 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1', @@ -21,11 +21,6 @@ class TruTubeIE(NuevoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - config_url = 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id - - info = self._extract_nuevo(config_url, video_id) - - # filehd always 404s - info['formats'] = info['formats'][:1] - - return info + return self._extract_nuevo( + 'https://trutube.tv/nuevo/player/config.php?v=%s' % video_id, + video_id)