From 3bf5705316adb4e486ec76a9308198b499787947 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 19 Feb 2015 01:43:20 +0100 Subject: [PATCH] [imgur] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/imgur.py | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 youtube_dl/extractor/imgur.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84a7edffcc..1d1f07ff52 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -205,6 +205,7 @@ from .imdb import ( ImdbIE, ImdbListIE ) +from .imgur import ImgurIE from .ina import InaIE from .infoq import InfoQIE from .instagram import InstagramIE, InstagramUserIE diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py new file mode 100644 index 0000000000..16488e0c4c --- /dev/null +++ b/youtube_dl/extractor/imgur.py @@ -0,0 +1,84 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + mimetype2ext, +) + + +class ImgurIE(InfoExtractor): + _VALID_URL = r'https?://i\.imgur\.com/(?P[a-zA-Z0-9]+)\.(?:mp4|gifv)' + + _TESTS = [{ + 'url': 'https://i.imgur.com/A61SaA1.gifv', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + width = int_or_none(self._search_regex( + r'(.*?)', + webpage, 'video elements') + formats = [] + for m in re.finditer(r'[^"]+)"\s+type="(?P[^"]+)"', video_elements): + formats.append({ + 'format_id': m.group('type').partition('/')[2], + 'url': self._proto_relative_url(m.group('src')), + 'ext': mimetype2ext(m.group('type')), + 'acodec': 'none', + 'width': width, + 'height': height, + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + gif_json = self._search_regex( + r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', + webpage, 'GIF code', fatal=False) + if gif_json: + gifd = self._parse_json( + gif_json, video_id, transform_source=js_to_json) + formats.append({ + 'format_id': 'gif', + 'preference': -10, + 'width': width, + 'height': height, + 'ext': 'gif', + 'acodec': 'none', + 'vcodec': 'gif', + 'container': 'gif', + 'url': self._proto_relative_url(gifd['gifUrl']), + 'filesize': gifd.get('size'), + 'http_headers': { + 'User-Agent': 'youtube-dl (like wget)', + }, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'title': self._og_search_title(webpage), + }