1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-12-15 10:05:24 +01:00
yt-dlp/youtube_dl/extractor/hypem.py
2013-11-03 14:06:47 +01:00

72 lines
2.4 KiB
Python

import json
import re
import time
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urllib_parse,
compat_urllib_request,
ExtractorError,
)
class HypemIE(InfoExtractor):
"""Information Extractor for hypem"""
_VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
_TEST = {
u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
u'file': u'1v6ga.mp3',
u'md5': u'b9cc91b5af8995e9f0c1cee04c575828',
u'info_dict': {
u"title": u"Tame"
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
track_id = mobj.group(1)
data = {'ax': 1, 'ts': time.time()}
data_encoded = compat_urllib_parse.urlencode(data)
complete_url = url + "?" + data_encoded
request = compat_urllib_request.Request(complete_url)
response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
cookie = urlh.headers.get('Set-Cookie', '')
self.report_extraction(track_id)
html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>',
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
try:
track_list = json.loads(html_tracks)
track = track_list[u'tracks'][0]
except ValueError:
raise ExtractorError(u'Hypemachine contained invalid JSON.')
key = track[u"key"]
track_id = track[u"id"]
artist = track[u"artist"]
title = track[u"song"]
serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
request.add_header('cookie', cookie)
song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
try:
song_data = json.loads(song_data_json)
except ValueError:
raise ExtractorError(u'Hypemachine contained invalid JSON.')
final_url = song_data[u"url"]
return [{
'id': track_id,
'url': final_url,
'ext': "mp3",
'title': title,
'artist': artist,
}]