Merge pull request #3709 from naglis/hostingbulk

[hostingbulk] Add new extractor
2014-09-10 19:04:14 +07:00 · 2014-09-10 19:04:14 +07:00 · 136c8bd275
parent 1bf5423e82 91ebb17ede
commit 136c8bd275
2 changed files with 89 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -140,6 +140,7 @@ from .hark import HarkIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
 from .hornbunny import HornBunnyIE
 from .hostingbulk import HostingBulkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howstuffworks import HowStuffWorksIE
--- a/youtube_dl/extractor/hostingbulk.py
+++ b/youtube_dl/extractor/hostingbulk.py
@ -0,0 +1,88 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    compat_urllib_request,
    int_or_none,
    urlencode_postdata,
 )
 class HostingBulkIE(InfoExtractor):
    _VALID_URL = r'''(?x)
        https?://(?:www\.)?hostingbulk\.com/
        (?:embed-)?(?P<id>[A-Za-z0-9]{12})(?:-\d+x\d+)?\.html'''
    _FILE_DELETED_REGEX = r'<b>File Not Found</b>'
    _TEST = {
        'url': 'http://hostingbulk.com/n0ulw1hv20fm.html',
        'md5': '6c8653c8ecf7ebfa83b76e24b7b2fe3f',
        'info_dict': {
            'id': 'n0ulw1hv20fm',
            'ext': 'mp4',
            'title': 'md5:5afeba33f48ec87219c269e054afd622',
            'filesize': 6816081,
            'thumbnail': 're:^http://.*\.jpg$',
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        url = 'http://hostingbulk.com/{0:}.html'.format(video_id)
        # Custom request with cookie to set language to English, so our file
        # deleted regex would work.
        request = compat_urllib_request.Request(
            url, headers={'Cookie': 'lang=english'})
        webpage = self._download_webpage(request, video_id)
        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
            raise ExtractorError('Video %s does not exist' % video_id,
                                 expected=True)
        title = self._html_search_regex(r'<h3>(.*?)</h3>', webpage, 'title')
        filesize = int_or_none(
            self._search_regex(
                r'<small>\((\d+)\sbytes?\)</small>',
                webpage,
                'filesize',
                fatal=False
            )
        )
        thumbnail = self._search_regex(
            r'<img src="([^"]+)".+?class="pic"',
            webpage, 'thumbnail', fatal=False)
        rand = self._search_regex(
            r'<input.+?name="rand" value="([^"]+)">', webpage, 'rand')
        fields = {
            'id': video_id,
            'method_free': '',
            'method_premium': '',
            'op': 'download2',
            'rand': rand,
            'referer': '',
        }
        request = compat_urllib_request.Request(url, urlencode_postdata(fields))
        request.add_header('Content-type', 'application/x-www-form-urlencoded')
        response = self._request_webpage(request, video_id,
                                         'Submiting download request')
        video_url = response.geturl()
        formats = [{
            'format_id': 'sd',
            'filesize': filesize,
            'url': video_url,
        }]
        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'formats': formats,
        }