ytdl-nightly/youtube_dl/extractor/hypem.py

from __future__ import unicode_literals

import json
import re
import time

from .common import InfoExtractor
from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,

    ExtractorError,
)


class HypemIE(InfoExtractor):
    _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
    _TEST = {
        'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
        'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
        'info_dict': {
            'id': '1v6ga',
            'ext': 'mp3',
            'title': 'Tame',
            'uploader': 'BODYWORK',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        track_id = mobj.group(1)

        data = {'ax': 1, 'ts': time.time()}
        data_encoded = compat_urllib_parse.urlencode(data)
        complete_url = url + "?" + data_encoded
        request = compat_urllib_request.Request(complete_url)
        response, urlh = self._download_webpage_handle(
            request, track_id, 'Downloading webpage with the url')
        cookie = urlh.headers.get('Set-Cookie', '')

        html_tracks = self._html_search_regex(
            r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>',
            response, 'tracks')
        try:
            track_list = json.loads(html_tracks)
            track = track_list['tracks'][0]
        except ValueError:
            raise ExtractorError('Hypemachine contained invalid JSON.')

        key = track['key']
        track_id = track['id']
        artist = track['artist']
        title = track['song']

        serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)
        request = compat_urllib_request.Request(
            serve_url, '', {'Content-Type': 'application/json'})
        request.add_header('cookie', cookie)
        song_data = self._download_json(request, track_id, 'Downloading metadata')
        final_url = song_data["url"]

        return {
            'id': track_id,
            'url': final_url,
            'ext': 'mp3',
            'title': title,
            'uploader': artist,
        }
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`from __future__ import unicode_literals`

[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`import json`
			`import re`
			`import time`

			`from .common import InfoExtractor`
			`from ..utils import (`
			`compat_urllib_parse,`
			`compat_urllib_request,`

			`ExtractorError,`
			`)`


			`class HypemIE(InfoExtractor):`
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`_VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`_TEST = {`
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',`
			`'md5': 'b9cc91b5af8995e9f0c1cee04c575828',`
			`'info_dict': {`
			`'id': '1v6ga',`
			`'ext': 'mp3',`
			`'title': 'Tame',`
			`'uploader': 'BODYWORK',`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`}`
			`}`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`track_id = mobj.group(1)`

Style fixes for extractors: remove spaces around (,),{ and } 2013-11-03 14:03:17 +01:00			`data = {'ax': 1, 'ts': time.time()}`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`data_encoded = compat_urllib_parse.urlencode(data)`
			`complete_url = url + "?" + data_encoded`
			`request = compat_urllib_request.Request(complete_url)`
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`response, urlh = self._download_webpage_handle(`
			`request, track_id, 'Downloading webpage with the url')`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`cookie = urlh.headers.get('Set-Cookie', '')`

[hypem] Modernize 2014-06-09 22:30:57 +02:00			`html_tracks = self._html_search_regex(`
			`r'(?ms)<script type="application/json" id="displayList-data">\s(.?)\s*</script>',`
			`response, 'tracks')`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`try:`
			`track_list = json.loads(html_tracks)`
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`track = track_list['tracks'][0]`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`except ValueError:`
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`raise ExtractorError('Hypemachine contained invalid JSON.')`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`key = track['key']`
			`track_id = track['id']`
			`artist = track['artist']`
			`title = track['song']`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key)`
			`request = compat_urllib_request.Request(`
			`serve_url, '', {'Content-Type': 'application/json'})`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00			`request.add_header('cookie', cookie)`
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`song_data = self._download_json(request, track_id, 'Downloading metadata')`
			`final_url = song_data["url"]`
[hypem] Move into own file 2013-06-23 22:29:27 +02:00
[hypem] Modernize 2014-06-09 22:30:57 +02:00			`return {`
			`'id': track_id,`
			`'url': final_url,`
			`'ext': 'mp3',`
			`'title': title,`
			`'uploader': artist,`
			`}`