diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index cc818fcc1d..bffb6d115b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,6 +1,7 @@ from .ard import ARDIE from .arte import ArteTvIE +from .auengine import AuengineIE from .bandcamp import BandcampIE from .bliptv import BlipTVIE, BlipTVUserIE from .breakcom import BreakIE @@ -138,6 +139,7 @@ def gen_extractors(): CSpanIE(), WimpIE(), HotNewHipHopIE(), + AuengineIE(), GenericIE() ] diff --git a/youtube_dl/extractor/auengine.py b/youtube_dl/extractor/auengine.py new file mode 100644 index 0000000000..94322cf595 --- /dev/null +++ b/youtube_dl/extractor/auengine.py @@ -0,0 +1,38 @@ +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, + compat_urllib_parse_urlparse, +) + +class AuengineIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?auengine\.com/embed.php\?.*?file=([^&]+).*?' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(?P<title>.+?)', + webpage, u'title') + title = title.strip() + links = re.findall(r'[^A-Za-z0-9]?(?:file|url):\s*["\'](http[^\'"&]*)', webpage) + links = [compat_urllib_parse.unquote(l) for l in links] + for link in links: + root, pathext = os.path.splitext(compat_urllib_parse_urlparse(link).path) + if pathext == '.png': + thumbnail = link + elif pathext == '.mp4': + url = link + ext = pathext + if ext == title[-len(ext):]: + title = title[:-len(ext)] + ext = ext[1:] + return [{ + 'id': video_id, + 'url': url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail, + }]