yt-dlp/yt_dlp/extractor/egghead.py

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    int_or_none,
    try_get,
    unified_timestamp,
    url_or_none,
)


class EggheadBaseIE(InfoExtractor):
    def _call_api(self, path, video_id, resource, fatal=True):
        return self._download_json(
            'https://app.egghead.io/api/v1/' + path,
            video_id, f'Downloading {resource} JSON', fatal=fatal)


class EggheadCourseIE(EggheadBaseIE):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
    _VALID_URL = r'https?://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
        'playlist_count': 29,
        'info_dict': {
            'id': '432655',
            'title': 'Professor Frisby Introduces Composable Functional JavaScript',
            'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
        },
    }, {
        'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        series_path = 'series/' + playlist_id
        lessons = self._call_api(
            series_path + '/lessons', playlist_id, 'course lessons')

        entries = []
        for lesson in lessons:
            lesson_url = url_or_none(lesson.get('http_url'))
            if not lesson_url:
                continue
            lesson_id = lesson.get('id')
            if lesson_id:
                lesson_id = str(lesson_id)
            entries.append(self.url_result(
                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))

        course = self._call_api(
            series_path, playlist_id, 'course', False) or {}

        playlist_id = course.get('id')
        if playlist_id:
            playlist_id = str(playlist_id)

        return self.playlist_result(
            entries, playlist_id, course.get('title'),
            course.get('description'))


class EggheadLessonIE(EggheadBaseIE):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
    _VALID_URL = r'https?://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
        'info_dict': {
            'id': '1196',
            'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
            'ext': 'mp4',
            'title': 'Create linear data flow with container style types (Box)',
            'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
            'thumbnail': r're:^https?:.*\.jpg$',
            'timestamp': 1481296768,
            'upload_date': '20161209',
            'duration': 304,
            'view_count': 0,
            'tags': 'count:2',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
        'only_matching': True,
    }, {
        'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        lesson = self._call_api(
            'lessons/' + display_id, display_id, 'lesson')

        lesson_id = str(lesson['id'])
        title = lesson['title']

        formats = []
        for _, format_url in lesson['media_urls'].items():
            format_url = url_or_none(format_url)
            if not format_url:
                continue
            ext = determine_ext(format_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    format_url, lesson_id, 'mp4', m3u8_id='hls', fatal=False))
            elif ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    format_url, lesson_id, mpd_id='dash', fatal=False))
            else:
                formats.append({
                    'url': format_url,
                })

        return {
            'id': lesson_id,
            'display_id': display_id,
            'title': title,
            'description': lesson.get('summary'),
            'thumbnail': lesson.get('thumb_nail'),
            'timestamp': unified_timestamp(lesson.get('published_at')),
            'duration': int_or_none(lesson.get('duration')),
            'view_count': int_or_none(lesson.get('plays_count')),
            'tags': try_get(lesson, lambda x: x['tag_list'], list),
            'series': try_get(
                lesson, lambda x: x['series']['title'], str),
            'formats': formats,
        }
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`from .common import InfoExtractor`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`from ..utils import (`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`determine_ext,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`int_or_none,`
			`try_get,`
			`unified_timestamp,`
Improve URL extraction 2018-07-21 19:08:28 +07:00			`url_or_none,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`)`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00

Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30			`class EggheadBaseIE(InfoExtractor):`
			`def _call_api(self, path, video_id, resource, fatal=True):`
			`return self._download_json(`
			`'https://app.egghead.io/api/v1/' + path,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`video_id, f'Downloading {resource} JSON', fatal=fatal)`
Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30

			`class EggheadCourseIE(EggheadBaseIE):`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`IE_DESC = 'egghead.io course'`
			`IE_NAME = 'egghead:course'`
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968) Except for Vimeo, since that causes matching collisions. Authored by: seproDev 2024-02-01 19:38:42 +01:00			`_VALID_URL = r'https?://(?:app\.)?egghead\.io/(?:course\|playlist)s/(?P<id>[^/?#&]+)'`
Update to ytdl-commit-379f52a [liveleak] Remove extractor https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961 2021-06-23 06:56:37 +05:30			`_TESTS = [{`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',`
			`'playlist_count': 29,`
			`'info_dict': {`
Update to ytdl-commit-379f52a [liveleak] Remove extractor https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961 2021-06-23 06:56:37 +05:30			`'id': '432655',`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00			`'title': 'Professor Frisby Introduces Composable Functional JavaScript',`
			`'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',`
			`},`
Update to ytdl-commit-379f52a [liveleak] Remove extractor https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961 2021-06-23 06:56:37 +05:30			`}, {`
			`'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',`
			`'only_matching': True,`
			`}]`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00
			`def _real_extract(self, url):`
			`playlist_id = self._match_id(url)`
Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30			`series_path = 'series/' + playlist_id`
			`lessons = self._call_api(`
			`series_path + '/lessons', playlist_id, 'course lessons')`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00
			`entries = []`
			`for lesson in lessons:`
Improve URL extraction 2018-07-21 19:08:28 +07:00			`lesson_url = url_or_none(lesson.get('http_url'))`
			`if not lesson_url:`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`continue`
			`lesson_id = lesson.get('id')`
			`if lesson_id:`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`lesson_id = str(lesson_id)`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`entries.append(self.url_result(`
			`lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))`

Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30			`course = self._call_api(`
			`series_path, playlist_id, 'course', False) or {}`
[egghead:course] Add support for egghead.io course playlists Individual egghead videos are already handled by the generic/Wistia extractors. 2017-01-09 11:24:40 +01:00
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`playlist_id = course.get('id')`
			`if playlist_id:`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`playlist_id = str(playlist_id)`
[egghead:course] Improve (closes #13370) 2017-07-09 17:28:42 +07:00
			`return self.playlist_result(`
			`entries, playlist_id, course.get('title'),`
			`course.get('description'))`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00

Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30			`class EggheadLessonIE(EggheadBaseIE):`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`IE_DESC = 'egghead.io lesson'`
			`IE_NAME = 'egghead:lesson'`
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968) Except for Vimeo, since that causes matching collisions. Authored by: seproDev 2024-02-01 19:38:42 +01:00			`_VALID_URL = r'https?://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`_TESTS = [{`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',`
			`'info_dict': {`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'id': '1196',`
			`'display_id': 'javascript-linear-data-flow-with-container-style-types-box',`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`'ext': 'mp4',`
			`'title': 'Create linear data flow with container style types (Box)',`
			`'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',`
			`'thumbnail': r're:^https?:.*\.jpg$',`
			`'timestamp': 1481296768,`
			`'upload_date': '20161209',`
			`'duration': 304,`
			`'view_count': 0,`
Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30			`'tags': 'count:2',`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`},`
			`'params': {`
			`'skip_download': True,`
			`},`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`}, {`
			`'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',`
			`'only_matching': True,`
Update to ytdl-commit-379f52a [liveleak] Remove extractor https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961 2021-06-23 06:56:37 +05:30			`}, {`
			`'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',`
			`'only_matching': True,`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`}]`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
			`def _real_extract(self, url):`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`display_id = self._match_id(url)`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
Update to ytdl-2021.02.04.1 except youtube 2021-02-04 13:26:01 +05:30			`lesson = self._call_api(`
			`'lessons/' + display_id, display_id, 'lesson')`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`lesson_id = str(lesson['id'])`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`title = lesson['title']`

			`formats = []`
			`for _, format_url in lesson['media_urls'].items():`
Improve URL extraction 2018-07-21 19:08:28 +07:00			`format_url = url_or_none(format_url)`
			`if not format_url:`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`continue`
			`ext = determine_ext(format_url)`
			`if ext == 'm3u8':`
			`formats.extend(self._extract_m3u8_formats(`
[extractor] Always prefer native hls downloader by default When the manifest is not downloadable by native downloader, it already is able to detect it and switch to `ffmpeg`. So there doesn't seem to be a reason anymore to use ffmpeg as the preferred downloader 2021-05-22 23:58:11 +05:30			`format_url, lesson_id, 'mp4', m3u8_id='hls', fatal=False))`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`elif ext == 'mpd':`
			`formats.extend(self._extract_mpd_formats(`
			`format_url, lesson_id, mpd_id='dash', fatal=False))`
			`else:`
			`formats.append({`
			`'url': format_url,`
			`})`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00
			`return {`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'id': lesson_id,`
			`'display_id': display_id,`
			`'title': title,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`'description': lesson.get('summary'),`
			`'thumbnail': lesson.get('thumb_nail'),`
			`'timestamp': unified_timestamp(lesson.get('published_at')),`
			`'duration': int_or_none(lesson.get('duration')),`
			`'view_count': int_or_none(lesson.get('plays_count')),`
			`'tags': try_get(lesson, lambda x: x['tag_list'], list),`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'series': try_get(`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`lesson, lambda x: x['series']['title'], str),`
[egghead] Fix extraction (closes #14388) 2017-10-29 07:11:37 +07:00			`'formats': formats,`
[egghead:lesson] Add extractor (#6635) 2017-07-20 23:22:36 +07:00			`}`