ytdl-nightly/youtube_dl/extractor/egghead.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    determine_ext,
    int_or_none,
    try_get,
    unified_timestamp,
    url_or_none,
)


class EggheadCourseIE(InfoExtractor):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
        'playlist_count': 29,
        'info_dict': {
            'id': '72',
            'title': 'Professor Frisby Introduces Composable Functional JavaScript',
            'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
        },
    }

    def _real_extract(self, url):
        playlist_id = self._match_id(url)

        lessons = self._download_json(
            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
            playlist_id, 'Downloading course lessons JSON')

        entries = []
        for lesson in lessons:
            lesson_url = url_or_none(lesson.get('http_url'))
            if not lesson_url:
                continue
            lesson_id = lesson.get('id')
            if lesson_id:
                lesson_id = compat_str(lesson_id)
            entries.append(self.url_result(
                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))

        course = self._download_json(
            'https://egghead.io/api/v1/series/%s' % playlist_id,
            playlist_id, 'Downloading course JSON', fatal=False) or {}

        playlist_id = course.get('id')
        if playlist_id:
            playlist_id = compat_str(playlist_id)

        return self.playlist_result(
            entries, playlist_id, course.get('title'),
            course.get('description'))


class EggheadLessonIE(InfoExtractor):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
        'info_dict': {
            'id': '1196',
            'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
            'ext': 'mp4',
            'title': 'Create linear data flow with container style types (Box)',
            'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
            'thumbnail': r're:^https?:.*\.jpg$',
            'timestamp': 1481296768,
            'upload_date': '20161209',
            'duration': 304,
            'view_count': 0,
            'tags': ['javascript', 'free'],
        },
        'params': {
            'skip_download': True,
            'format': 'bestvideo',
        },
    }, {
        'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        lesson = self._download_json(
            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)

        lesson_id = compat_str(lesson['id'])
        title = lesson['title']

        formats = []
        for _, format_url in lesson['media_urls'].items():
            format_url = url_or_none(format_url)
            if not format_url:
                continue
            ext = determine_ext(format_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    format_url, lesson_id, 'mp4', entry_protocol='m3u8',
                    m3u8_id='hls', fatal=False))
            elif ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    format_url, lesson_id, mpd_id='dash', fatal=False))
            else:
                formats.append({
                    'url': format_url,
                })
        self._sort_formats(formats)

        return {
            'id': lesson_id,
            'display_id': display_id,
            'title': title,
            'description': lesson.get('summary'),
            'thumbnail': lesson.get('thumb_nail'),
            'timestamp': unified_timestamp(lesson.get('published_at')),
            'duration': int_or_none(lesson.get('duration')),
            'view_count': int_or_none(lesson.get('plays_count')),
            'tags': try_get(lesson, lambda x: x['tag_list'], list),
            'series': try_get(
                lesson, lambda x: x['series']['title'], compat_str),
            'formats': formats,
        }
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`from ..compat import compat_str`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`from ..utils import (`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`determine_ext,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`int_or_none,`
			`try_get,`
			`unified_timestamp,`
Improve URL extraction 2018-07-21 19:08:28 +07:00			`url_or_none,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`)`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00

			`class EggheadCourseIE(InfoExtractor):`
			`IE_DESC = 'egghead.io course'`
			`IE_NAME = 'egghead:course'`
[egghead:course] Improve (closes #13370) 2017-07-09 17:28:42 +07:00			`_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`_TEST = {`
			`'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',`
			`'playlist_count': 29,`
			`'info_dict': {`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'id': '72',`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`'title': 'Professor Frisby Introduces Composable Functional JavaScript',`
			`'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',`
			`},`
			`}`

			`def _real_extract(self, url):`
			`playlist_id = self._match_id(url)`

[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`lessons = self._download_json(`
			`'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,`
			`playlist_id, 'Downloading course lessons JSON')`

			`entries = []`
			`for lesson in lessons:`
Improve URL extraction 2018-07-21 19:08:28 +07:00			`lesson_url = url_or_none(lesson.get('http_url'))`
			`if not lesson_url:`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`continue`
			`lesson_id = lesson.get('id')`
			`if lesson_id:`
			`lesson_id = compat_str(lesson_id)`
			`entries.append(self.url_result(`
			`lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))`

[egghead:course] Improve (closes #13370) 2017-07-09 17:28:42 +07:00			`course = self._download_json(`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'https://egghead.io/api/v1/series/%s' % playlist_id,`
			`playlist_id, 'Downloading course JSON', fatal=False) or {}`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`playlist_id = course.get('id')`
			`if playlist_id:`
			`playlist_id = compat_str(playlist_id)`
[egghead:course] Improve (closes #13370) 2017-07-09 17:28:42 +07:00
			`return self.playlist_result(`
			`entries, playlist_id, course.get('title'),`
			`course.get('description'))`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00

			`class EggheadLessonIE(InfoExtractor):`
			`IE_DESC = 'egghead.io lesson'`
			`IE_NAME = 'egghead:lesson'`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'`
			`_TESTS = [{`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',`
			`'info_dict': {`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'id': '1196',`
			`'display_id': 'javascript-linear-data-flow-with-container-style-types-box',`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`'ext': 'mp4',`
			`'title': 'Create linear data flow with container style types (Box)',`
			`'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',`
			`'thumbnail': r're:^https?:.*\.jpg$',`
			`'timestamp': 1481296768,`
			`'upload_date': '20161209',`
			`'duration': 304,`
			`'view_count': 0,`
			`'tags': ['javascript', 'free'],`
			`},`
			`'params': {`
			`'skip_download': True,`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'format': 'bestvideo',`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`},`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`}, {`
			`'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',`
			`'only_matching': True,`
			`}]`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
			`def _real_extract(self, url):`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`display_id = self._match_id(url)`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
			`lesson = self._download_json(`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)`

			`lesson_id = compat_str(lesson['id'])`
			`title = lesson['title']`

			`formats = []`
			`for _, format_url in lesson['media_urls'].items():`
Improve URL extraction 2018-07-21 19:08:28 +07:00			`format_url = url_or_none(format_url)`
			`if not format_url:`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`continue`
			`ext = determine_ext(format_url)`
			`if ext == 'm3u8':`
			`formats.extend(self._extract_m3u8_formats(`
			`format_url, lesson_id, 'mp4', entry_protocol='m3u8',`
			`m3u8_id='hls', fatal=False))`
			`elif ext == 'mpd':`
			`formats.extend(self._extract_mpd_formats(`
			`format_url, lesson_id, mpd_id='dash', fatal=False))`
			`else:`
			`formats.append({`
			`'url': format_url,`
			`})`
			`self._sort_formats(formats)`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
			`return {`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'id': lesson_id,`
			`'display_id': display_id,`
			`'title': title,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`'description': lesson.get('summary'),`
			`'thumbnail': lesson.get('thumb_nail'),`
			`'timestamp': unified_timestamp(lesson.get('published_at')),`
			`'duration': int_or_none(lesson.get('duration')),`
			`'view_count': int_or_none(lesson.get('plays_count')),`
			`'tags': try_get(lesson, lambda x: x['tag_list'], list),`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'series': try_get(`
			`lesson, lambda x: x['series']['title'], compat_str),`
			`'formats': formats,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`}`