From 43f0537c06384b9b97235a93ea39649ee3de4d45 Mon Sep 17 00:00:00 2001 From: hassaanaliw Date: Wed, 16 Jul 2014 18:45:42 +0500 Subject: [PATCH] [cracked] Add new extractor --- youtube_dl/extractor/__init__.py | 2 ++ youtube_dl/extractor/cracked.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/cracked.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e49ac3e527..78b95c2a5f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .cnn import ( from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .condenast import CondeNastIE +from .cracked import CrackedIE from .criterion import CriterionIE from .crunchyroll import CrunchyrollIE from .cspan import CSpanIE @@ -396,6 +397,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) + from .zdf import ZDFIE diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py new file mode 100644 index 0000000000..37c0f7ffb6 --- /dev/null +++ b/youtube_dl/extractor/cracked.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class CrackedIE(InfoExtractor): + _VALID_URL = r'http?://.*?\.cracked\.com/video_+(?P.*)_.*' + _TEST = { + 'url': 'http://www.cracked.com/video_18803_4-social-criticisms-hidden-in-sonic-hedgehog-games.html', + + 'info_dict': { + 'id': '18803', + 'ext': 'mp4', + 'title': "4 Social Criticisms Hidden in 'Sonic the Hedgehog' Games | Cracked.com", + 'height': 375, + 'width': 666, + + + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'(.*?)',webpage,'title') + video_url = self._search_regex(r'var CK_vidSrc = "+(.*)"',webpage,'url') + width = self._search_regex(r'width="(.*?)"',webpage,'width') + height = re.findall(r'height="(.*?)"',webpage)[1] + + + + + return { + 'url':video_url, + 'id': video_id, + 'ext':'mp4', + 'title':title, + 'height':int(height), + 'width':int(width) + + + } \ No newline at end of file