[extractor/odkmedia] Add `OnDemandChinaEpisodeIE` (#6116)

Authored by: HobbyistDev, pukkandan
This commit is contained in:
HobbyistDev 2023-02-17 12:00:07 +09:00 committed by GitHub
parent 72671a212d
commit 10fd9e6ee8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 0 deletions

View File

@ -1292,6 +1292,7 @@ from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE from .nzz import NZZIE
from .odatv import OdaTVIE from .odatv import OdaTVIE
from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .oftv import ( from .oftv import (
OfTVIE, OfTVIE,

View File

@ -0,0 +1,105 @@
import json
import urllib.error
from .common import InfoExtractor
from ..utils import (
ExtractorError,
GeoRestrictedError,
float_or_none,
traverse_obj,
try_call
)
class OnDemandChinaEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://www\.ondemandchina\.com/\w+/watch/(?P<series>[\w-]+)/(?P<id>ep-(?P<ep>\d+))'
_TESTS = [{
'url': 'https://www.ondemandchina.com/en/watch/together-against-covid-19/ep-1',
'info_dict': {
'id': '264394',
'ext': 'mp4',
'duration': 3256.88,
'title': 'EP 1 The Calling',
'alt_title': '第1集 令出如山',
'thumbnail': 'https://d2y2efdi5wgkcl.cloudfront.net/fit-in/256x256/media-io/2020/9/11/image.d9816e81.jpg',
'description': '疫情严峻,党政军民学、东西南北中协同应考',
'tags': ['Social Humanities', 'Documentary', 'Medical', 'Social'],
}
}]
_QUERY = '''
query Episode($programSlug: String!, $episodeNumber: Int!) {
episode(
programSlug: $programSlug
episodeNumber: $episodeNumber
kind: "series"
part: null
) {
id
title
titleEn
titleKo
titleZhHans
titleZhHant
synopsis
synopsisEn
synopsisKo
synopsisZhHans
synopsisZhHant
videoDuration
images {
thumbnail
}
}
}'''
def _real_extract(self, url):
program_slug, display_id, ep_number = self._match_valid_url(url).group('series', 'id', 'ep')
webpage = self._download_webpage(url, display_id)
video_info = self._download_json(
'https://odc-graphql.odkmedia.io/graphql', display_id,
headers={'Content-type': 'application/json'},
data=json.dumps({
'operationName': 'Episode',
'query': self._QUERY,
'variables': {
'programSlug': program_slug,
'episodeNumber': int(ep_number),
},
}).encode())['data']['episode']
try:
source_json = self._download_json(
f'https://odkmedia.io/odc/api/v2/playback/{video_info["id"]}/', display_id,
headers={'Authorization': '', 'service-name': 'odc'})
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError):
error_data = self._parse_json(e.cause.read(), display_id)['detail']
raise GeoRestrictedError(error_data)
formats, subtitles = [], {}
for source in traverse_obj(source_json, ('sources', ...)):
if source.get('type') == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('url'), display_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
self.report_warning(f'Unsupported format {source.get("type")}', display_id)
return {
'id': str(video_info['id']),
'duration': float_or_none(video_info.get('videoDuration'), 1000),
'thumbnail': (traverse_obj(video_info, ('images', 'thumbnail'))
or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
'title': (traverse_obj(video_info, 'title', 'titleEn')
or self._html_search_meta(['og:title', 'twitter:title'], webpage)
or self._html_extract_title(webpage)),
'alt_title': traverse_obj(video_info, 'titleKo', 'titleZhHans', 'titleZhHant'),
'description': (traverse_obj(
video_info, 'synopsisEn', 'synopsisKo', 'synopsisZhHans', 'synopsisZhHant', 'synopisis')
or self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)),
'formats': formats,
'subtitles': subtitles,
'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', '))
}