Merge remote-tracking branch 'gabeos/crunchyroll-show-playlist'

This commit is contained in:
Philipp Hagemeister 2014-10-26 17:06:35 +01:00
commit 274b12b5a8
2 changed files with 40 additions and 1 deletions

View File

@ -62,7 +62,10 @@ from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE from .crunchyroll import (
CrunchyrollIE,
CrunchyrollShowPlaylistIE
)
from .cspan import CSpanIE from .cspan import CSpanIE
from .d8 import D8IE from .d8 import D8IE
from .dailymotion import ( from .dailymotion import (

View File

@ -24,6 +24,7 @@ from ..aes import (
aes_cbc_decrypt, aes_cbc_decrypt,
inc, inc,
) )
from .common import InfoExtractor
class CrunchyrollIE(SubtitlesInfoExtractor): class CrunchyrollIE(SubtitlesInfoExtractor):
@ -288,3 +289,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
} }
class CrunchyrollShowPlaylistIE(InfoExtractor):
IE_NAME = "crunchyroll:playlist"
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<show>[\w\-]+))/?$'
_TITLE_EXTR = r'<span\s+itemprop="name">\s*(?P<showtitle>[\w\s]+)'
_TESTS = [{
'url' : 'http://www.crunchyroll.com/attack-on-titan',
'info_dict' : {
'title' : 'Attack on Titan'
},
'playlist_count' : 15
}]
def _extract_title_entries(self,id,webpage):
_EPISODE_ID_EXTR = r'id="showview_videos_media_(?P<vidid>\d+)".*?href="/{0}/(?P<vidurl>[\w\-]+-(?P=vidid))"'.format(id)
title = self._html_search_regex(self._TITLE_EXTR,webpage,"title",flags=re.UNICODE|re.MULTILINE)
episode_urls = [self.url_result('http://www.crunchyroll.com/{0}/{1}'.format(id, showmatch[1])) for
showmatch in re.findall(_EPISODE_ID_EXTR, webpage,re.UNICODE|re.MULTILINE|re.DOTALL)]
episode_urls.reverse()
return title, episode_urls
def _real_extract(self, url):
url_match = re.match(self._VALID_URL,url)
show_id = url_match.group('show')
webpage = self._download_webpage(url,show_id)
(title,entries) = self._extract_title_entries(show_id,webpage)
return {
'_type' : 'playlist',
'id' : show_id,
'title' : title,
'entries' : entries
}