From d73ebac100c9f91acb002c4844ba67b73616322a Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 6 Aug 2016 11:18:14 +0100 Subject: [PATCH] [pokemon] Add new extractor(closes #10093) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pokemon.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/pokemon.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index fec560ba3d..11b64eeaa1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -636,6 +636,7 @@ from .pluralsight import ( PluralsightCourseIE, ) from .podomatic import PodomaticIE +from .pokemon import PokemonIE from .polskieradio import PolskieRadioIE from .porn91 import Porn91IE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py new file mode 100644 index 0000000000..ce27f33e18 --- /dev/null +++ b/youtube_dl/extractor/pokemon.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, +) + + +class PokemonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/pokemon-episodes/(?P[^/?#]+))' + _TESTS = [{ + 'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true', + 'md5': '9fb209ae3a569aac25de0f5afc4ee08f', + 'info_dict': { + 'id': 'd0436c00c3ce4071ac6cee8130ac54a1', + 'ext': 'mp4', + 'title': 'From A to Z!', + 'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!', + 'timestamp': 1460478136, + 'upload_date': '20160412', + }, + 'add_id': ['LimelightMedia'] + }, { + 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, display_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id or display_id) + video_data = extract_attributes(self._search_regex( + r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), + webpage, 'video data element')) + video_id = video_data['data-video-id'] + title = video_data['data-video-title'] + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'title': title, + 'description': video_data.get('data-video-summary'), + 'thumbnail': video_data.get('data-video-poster'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('data-video-season')), + 'episode': title, + 'episode_number': int_or_none(video_data.get('data-video-episode')), + 'ie_key': 'LimelightMedia', + }