ytdl-nightly/youtube_dl/extractor/escapist.py

from __future__ import unicode_literals

import json

from .common import InfoExtractor
from ..compat import compat_urllib_request

from ..utils import (
    determine_ext,
    clean_html,
    int_or_none,
)


def _decrypt_config(key, string):
    a = ''
    i = ''
    r = ''

    while len(a) < (len(string) / 2):
        a += key

    a = a[0:int(len(string) / 2)]

    t = 0
    while t < len(string):
        i += chr(int(string[t] + string[t + 1], 16))
        t += 2

    icko = [s for s in i]

    for t, c in enumerate(a):
        r += chr(ord(c) ^ ord(icko[t]))

    return r


class EscapistIE(InfoExtractor):
    _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
    _TESTS = [{
        'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
        'md5': 'c6793dbda81388f4264c1ba18684a74d',
        'info_dict': {
            'id': '6618',
            'ext': 'mp4',
            'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
            'title': "Breaking Down Baldur's Gate",
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 264,
        }
    }, {
        'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
        'md5': 'cf8842a8a46444d241f9a9980d7874f2',
        'info_dict': {
            'id': '10044',
            'ext': 'mp4',
            'description': 'This week, Zero Punctuation reviews Evolve.',
            'title': 'Evolve - One vs Multiplayer',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': 304,
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        imsVideo = self._parse_json(
            self._search_regex(
                r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
            video_id)
        video_id = imsVideo['videoID']
        key = imsVideo['hash']

        config_req = compat_urllib_request.Request(
            'http://www.escapistmagazine.com/videos/'
            'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
        config_req.add_header('Referer', url)
        config = self._download_webpage(config_req, video_id, 'Downloading video config')

        data = json.loads(_decrypt_config(key, config))

        title = clean_html(data['videoData']['title'])
        duration = data['videoData']['duration'] / 1000

        formats = [{
            'url': video['src'],
            'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
            'height': int_or_none(video.get('res')),
        } for video in data['files']['videos']]
        self._sort_formats(formats)

        return {
            'id': video_id,
            'formats': formats,
            'title': title,
            'thumbnail': self._og_search_thumbnail(webpage),
            'description': self._og_search_description(webpage),
            'duration': duration,
        }
[ecapist] modernize and fix id property 2014-02-13 16:32:42 +01:00			`from __future__ import unicode_literals`

[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`import json`

Move Escapist into its own file 2013-06-23 21:08:17 +02:00			`from .common import InfoExtractor`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`from ..compat import compat_urllib_request`

Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 12:24:42 +01:00			`from ..utils import (`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`determine_ext,`
			`clean_html,`
[escapist] Fix formats extraction 2015-05-04 14:59:22 +02:00			`int_or_none,`
Move Escapist into its own file 2013-06-23 21:08:17 +02:00			`)`


[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`def _decrypt_config(key, string):`
			`a = ''`
			`i = ''`
			`r = ''`

			`while len(a) < (len(string) / 2):`
			`a += key`

			`a = a[0:int(len(string) / 2)]`

			`t = 0`
			`while t < len(string):`
			`i += chr(int(string[t] + string[t + 1], 16))`
			`t += 2`

			`icko = [s for s in i]`

			`for t, c in enumerate(a):`
			`r += chr(ord(c) ^ ord(icko[t]))`

			`return r`


Move Escapist into its own file 2013-06-23 21:08:17 +02:00			`class EscapistIE(InfoExtractor):`
[escapist] Modernize 2015-02-10 15:45:36 +01:00			`_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$\|[?#])'`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`_TESTS = [{`
[ecapist] modernize and fix id property 2014-02-13 16:32:42 +01:00			`'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`'md5': 'c6793dbda81388f4264c1ba18684a74d',`
[ecapist] modernize and fix id property 2014-02-13 16:32:42 +01:00			`'info_dict': {`
			`'id': '6618',`
			`'ext': 'mp4',`
			`'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",`
			`'title': "Breaking Down Baldur's Gate",`
[escapist] Fix extraction (fixes #5017) 2015-02-20 23:22:47 +01:00			`'thumbnail': 're:^https?://.*\.jpg$',`
[escapist] Extract duration 2015-02-28 20:52:52 +01:00			`'duration': 264,`
Move tests to the IE definitions 2013-06-27 20:46:46 +02:00			`}`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`}, {`
			`'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',`
			`'md5': 'cf8842a8a46444d241f9a9980d7874f2',`
			`'info_dict': {`
			`'id': '10044',`
			`'ext': 'mp4',`
			`'description': 'This week, Zero Punctuation reviews Evolve.',`
			`'title': 'Evolve - One vs Multiplayer',`
			`'thumbnail': 're:^https?://.*\.jpg$',`
			`'duration': 304,`
			`}`
			`}]`
Move Escapist into its own file 2013-06-23 21:08:17 +02:00
			`def _real_extract(self, url):`
[escapist] Modernize 2015-02-10 15:45:36 +01:00			`video_id = self._match_id(url)`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`webpage = self._download_webpage(url, video_id)`

[escapist] Fix imsVideo regex (#5090) 2015-04-27 18:17:51 +02:00			`imsVideo = self._parse_json(`
			`self._search_regex(`
			`r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),`
			`video_id)`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`video_id = imsVideo['videoID']`
			`key = imsVideo['hash']`

[escapist] Fix formats extraction 2015-05-04 14:59:22 +02:00			`config_req = compat_urllib_request.Request(`
			`'http://www.escapistmagazine.com/videos/'`
			`'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))`
			`config_req.add_header('Referer', url)`
			`config = self._download_webpage(config_req, video_id, 'Downloading video config')`
Move Escapist into its own file 2013-06-23 21:08:17 +02:00
[escapist] Fix formats extraction 2015-05-04 14:59:22 +02:00			`data = json.loads(_decrypt_config(key, config))`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00
[escapist] Fix formats extraction 2015-05-04 14:59:22 +02:00			`title = clean_html(data['videoData']['title'])`
			`duration = data['videoData']['duration'] / 1000`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00
[escapist] Fix formats extraction 2015-05-04 14:59:22 +02:00			`formats = [{`
			`'url': video['src'],`
			`'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),`
			`'height': int_or_none(video.get('res')),`
			`} for video in data['files']['videos']]`
			`self._sort_formats(formats)`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00
			`return {`
[ecapist] modernize and fix id property 2014-02-13 16:32:42 +01:00			`'id': video_id,`
[escapist] Add support for HD format (Closes #1755) 2013-11-20 06:47:50 +01:00			`'formats': formats,`
Move Escapist into its own file 2013-06-23 21:08:17 +02:00			`'title': title,`
InfoExtractor: add some helper methods to extract OpenGraph info 2013-07-12 19:00:19 +02:00			`'thumbnail': self._og_search_thumbnail(webpage),`
[escapist] Fix extractor (fixes #5090) 2015-04-27 17:44:13 +02:00			`'description': self._og_search_description(webpage),`
[escapist] Extract duration 2015-02-28 20:52:52 +01:00			`'duration': duration,`
Move Escapist into its own file 2013-06-23 21:08:17 +02:00			`}`