mirror of https://github.com/yt-dlp/yt-dlp.git
[crunchyroll] parse vilos media data(closes #17343)
This commit is contained in:
parent
ed6919e737
commit
54a5be4dba
|
@ -7,7 +7,7 @@ import zlib
|
||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
from math import pow, sqrt, floor
|
from math import pow, sqrt, floor
|
||||||
from .common import InfoExtractor
|
from .vrv import VRVIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
|
@ -18,6 +18,8 @@ from ..compat import (
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
bytes_to_intlist,
|
bytes_to_intlist,
|
||||||
|
extract_attributes,
|
||||||
|
float_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
@ -26,14 +28,13 @@ from ..utils import (
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
extract_attributes,
|
|
||||||
)
|
)
|
||||||
from ..aes import (
|
from ..aes import (
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollBaseIE(InfoExtractor):
|
class CrunchyrollBaseIE(VRVIE):
|
||||||
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
_LOGIN_URL = 'https://www.crunchyroll.com/login'
|
||||||
_LOGIN_FORM = 'login_form'
|
_LOGIN_FORM = 'login_form'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
|
@ -148,7 +149,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||||
'upload_date': '20131013',
|
'upload_date': '20131013',
|
||||||
'url': 're:(?!.*&)',
|
'url': 're:(?!.*&)',
|
||||||
|
@ -221,7 +222,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '535080',
|
'id': '535080',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
||||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||||
'uploader': 'Marvelous AQL Inc.',
|
'uploader': 'Marvelous AQL Inc.',
|
||||||
'upload_date': '20091021',
|
'upload_date': '20091021',
|
||||||
|
@ -437,13 +438,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
|
||||||
|
media = self._parse_json(self._search_regex(
|
||||||
|
r'vilos\.config\.media\s*=\s*({.+?});',
|
||||||
|
webpage, 'vilos media', default='{}'), video_id)
|
||||||
|
media_metadata = media.get('metadata') or {}
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||||
webpage, 'video_title')
|
webpage, 'video_title')
|
||||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||||
video_description = self._parse_json(self._html_search_regex(
|
video_description = (self._parse_json(self._html_search_regex(
|
||||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||||
webpage, 'description', default='{}'), video_id).get('description')
|
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||||
if video_description:
|
if video_description:
|
||||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||||
video_upload_date = self._html_search_regex(
|
video_upload_date = self._html_search_regex(
|
||||||
|
@ -456,91 +462,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||||
webpage, 'video_uploader', fatal=False)
|
webpage, 'video_uploader', fatal=False)
|
||||||
|
|
||||||
available_fmts = []
|
|
||||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
|
||||||
attrs = extract_attributes(a)
|
|
||||||
href = attrs.get('href')
|
|
||||||
if href and '/freetrial' in href:
|
|
||||||
continue
|
|
||||||
available_fmts.append(fmt)
|
|
||||||
if not available_fmts:
|
|
||||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
|
||||||
available_fmts = re.findall(p, webpage)
|
|
||||||
if available_fmts:
|
|
||||||
break
|
|
||||||
video_encode_ids = []
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in available_fmts:
|
for stream in media.get('streams', []):
|
||||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
formats.extend(self._extract_vrv_formats(
|
||||||
video_format = fmt + 'p'
|
stream.get('url'), video_id, stream.get('format'),
|
||||||
stream_infos = []
|
stream.get('audio_lang'), stream.get('hardsub_lang')))
|
||||||
streamdata = self._call_rpc_api(
|
if not formats:
|
||||||
'VideoPlayer_GetStandardConfig', video_id,
|
available_fmts = []
|
||||||
'Downloading media info for %s' % video_format, data={
|
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||||
'media_id': video_id,
|
attrs = extract_attributes(a)
|
||||||
'video_format': stream_format,
|
href = attrs.get('href')
|
||||||
'video_quality': stream_quality,
|
if href and '/freetrial' in href:
|
||||||
'current_page': url,
|
continue
|
||||||
})
|
available_fmts.append(fmt)
|
||||||
if streamdata is not None:
|
if not available_fmts:
|
||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||||
|
available_fmts = re.findall(p, webpage)
|
||||||
|
if available_fmts:
|
||||||
|
break
|
||||||
|
if not available_fmts:
|
||||||
|
available_fmts = self._FORMAT_IDS.keys()
|
||||||
|
video_encode_ids = []
|
||||||
|
|
||||||
|
for fmt in available_fmts:
|
||||||
|
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||||
|
video_format = fmt + 'p'
|
||||||
|
stream_infos = []
|
||||||
|
streamdata = self._call_rpc_api(
|
||||||
|
'VideoPlayer_GetStandardConfig', video_id,
|
||||||
|
'Downloading media info for %s' % video_format, data={
|
||||||
|
'media_id': video_id,
|
||||||
|
'video_format': stream_format,
|
||||||
|
'video_quality': stream_quality,
|
||||||
|
'current_page': url,
|
||||||
|
})
|
||||||
|
if streamdata is not None:
|
||||||
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
|
if stream_info is not None:
|
||||||
|
stream_infos.append(stream_info)
|
||||||
|
stream_info = self._call_rpc_api(
|
||||||
|
'VideoEncode_GetStreamInfo', video_id,
|
||||||
|
'Downloading stream info for %s' % video_format, data={
|
||||||
|
'media_id': video_id,
|
||||||
|
'video_format': stream_format,
|
||||||
|
'video_encode_quality': stream_quality,
|
||||||
|
})
|
||||||
if stream_info is not None:
|
if stream_info is not None:
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
stream_info = self._call_rpc_api(
|
for stream_info in stream_infos:
|
||||||
'VideoEncode_GetStreamInfo', video_id,
|
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||||
'Downloading stream info for %s' % video_format, data={
|
if video_encode_id in video_encode_ids:
|
||||||
'media_id': video_id,
|
continue
|
||||||
'video_format': stream_format,
|
video_encode_ids.append(video_encode_id)
|
||||||
'video_encode_quality': stream_quality,
|
|
||||||
})
|
|
||||||
if stream_info is not None:
|
|
||||||
stream_infos.append(stream_info)
|
|
||||||
for stream_info in stream_infos:
|
|
||||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
|
||||||
if video_encode_id in video_encode_ids:
|
|
||||||
continue
|
|
||||||
video_encode_ids.append(video_encode_id)
|
|
||||||
|
|
||||||
video_file = xpath_text(stream_info, './file')
|
video_file = xpath_text(stream_info, './file')
|
||||||
if not video_file:
|
if not video_file:
|
||||||
continue
|
continue
|
||||||
if video_file.startswith('http'):
|
if video_file.startswith('http'):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
continue
|
|
||||||
|
|
||||||
video_url = xpath_text(stream_info, './host')
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
metadata = stream_info.find('./metadata')
|
|
||||||
format_info = {
|
|
||||||
'format': video_format,
|
|
||||||
'height': int_or_none(xpath_text(metadata, './height')),
|
|
||||||
'width': int_or_none(xpath_text(metadata, './width')),
|
|
||||||
}
|
|
||||||
|
|
||||||
if '.fplive.net/' in video_url:
|
|
||||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
|
||||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
|
||||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
|
||||||
netloc='v.lvlt.crcdn.net',
|
|
||||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
|
||||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
|
||||||
format_info.update({
|
|
||||||
'format_id': 'http-' + video_format,
|
|
||||||
'url': direct_video_url,
|
|
||||||
})
|
|
||||||
formats.append(format_info)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
format_info.update({
|
video_url = xpath_text(stream_info, './host')
|
||||||
'format_id': 'rtmp-' + video_format,
|
if not video_url:
|
||||||
'url': video_url,
|
continue
|
||||||
'play_path': video_file,
|
metadata = stream_info.find('./metadata')
|
||||||
'ext': 'flv',
|
format_info = {
|
||||||
})
|
'format': video_format,
|
||||||
formats.append(format_info)
|
'height': int_or_none(xpath_text(metadata, './height')),
|
||||||
|
'width': int_or_none(xpath_text(metadata, './width')),
|
||||||
|
}
|
||||||
|
|
||||||
|
if '.fplive.net/' in video_url:
|
||||||
|
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||||
|
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||||
|
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||||
|
netloc='v.lvlt.crcdn.net',
|
||||||
|
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||||
|
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||||
|
format_info.update({
|
||||||
|
'format_id': 'http-' + video_format,
|
||||||
|
'url': direct_video_url,
|
||||||
|
})
|
||||||
|
formats.append(format_info)
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_info.update({
|
||||||
|
'format_id': 'rtmp-' + video_format,
|
||||||
|
'url': video_url,
|
||||||
|
'play_path': video_file,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
formats.append(format_info)
|
||||||
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
|
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
|
||||||
|
|
||||||
metadata = self._call_rpc_api(
|
metadata = self._call_rpc_api(
|
||||||
|
@ -549,7 +563,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
'media_id': video_id,
|
'media_id': video_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
subtitles = self.extract_subtitles(video_id, webpage)
|
subtitles = {}
|
||||||
|
for subtitle in media.get('subtitles', []):
|
||||||
|
subtitle_url = subtitle.get('url')
|
||||||
|
if not subtitle_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
|
||||||
|
'url': subtitle_url,
|
||||||
|
'ext': subtitle.get('format', 'ass'),
|
||||||
|
})
|
||||||
|
if not subtitles:
|
||||||
|
subtitles = self.extract_subtitles(video_id, webpage)
|
||||||
|
|
||||||
# webpage provide more accurate data than series_title from XML
|
# webpage provide more accurate data than series_title from XML
|
||||||
series = self._html_search_regex(
|
series = self._html_search_regex(
|
||||||
|
@ -557,8 +581,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
webpage, 'series', fatal=False)
|
webpage, 'series', fatal=False)
|
||||||
season = xpath_text(metadata, 'series_title')
|
season = xpath_text(metadata, 'series_title')
|
||||||
|
|
||||||
episode = xpath_text(metadata, 'episode_title')
|
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
|
||||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
|
||||||
|
|
||||||
season_number = int_or_none(self._search_regex(
|
season_number = int_or_none(self._search_regex(
|
||||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||||
|
@ -568,7 +592,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
'duration': float_or_none(media_metadata.get('duration'), 1000),
|
||||||
|
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'series': series,
|
'series': series,
|
||||||
|
|
|
@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor):
|
||||||
class VRVIE(VRVBaseIE):
|
class VRVIE(VRVBaseIE):
|
||||||
IE_NAME = 'vrv'
|
IE_NAME = 'vrv'
|
||||||
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
|
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'GR9PNZ396',
|
'id': 'GR9PNZ396',
|
||||||
|
@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE):
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
|
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
|
||||||
|
if not url or stream_format not in ('hls', 'dash'):
|
||||||
|
return []
|
||||||
|
stream_id = hardsub_lang or audio_lang
|
||||||
|
format_id = '%s-%s' % (stream_format, stream_id)
|
||||||
|
if stream_format == 'hls':
|
||||||
|
adaptive_formats = self._extract_m3u8_formats(
|
||||||
|
url, video_id, 'mp4', m3u8_id=format_id,
|
||||||
|
note='Downloading %s m3u8 information' % stream_id,
|
||||||
|
fatal=False)
|
||||||
|
elif stream_format == 'dash':
|
||||||
|
adaptive_formats = self._extract_mpd_formats(
|
||||||
|
url, video_id, mpd_id=format_id,
|
||||||
|
note='Downloading %s MPD information' % stream_id,
|
||||||
|
fatal=False)
|
||||||
|
if audio_lang:
|
||||||
|
for f in adaptive_formats:
|
||||||
|
if f.get('acodec') != 'none':
|
||||||
|
f['language'] = audio_lang
|
||||||
|
return adaptive_formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE):
|
||||||
for stream_type, streams in streams_json.get('streams', {}).items():
|
for stream_type, streams in streams_json.get('streams', {}).items():
|
||||||
if stream_type in ('adaptive_hls', 'adaptive_dash'):
|
if stream_type in ('adaptive_hls', 'adaptive_dash'):
|
||||||
for stream in streams.values():
|
for stream in streams.values():
|
||||||
stream_url = stream.get('url')
|
formats.extend(self._extract_vrv_formats(
|
||||||
if not stream_url:
|
stream.get('url'), video_id, stream_type.split('_')[1],
|
||||||
continue
|
audio_locale, stream.get('hardsub_locale')))
|
||||||
stream_id = stream.get('hardsub_locale') or audio_locale
|
|
||||||
format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
|
|
||||||
if stream_type == 'adaptive_hls':
|
|
||||||
adaptive_formats = self._extract_m3u8_formats(
|
|
||||||
stream_url, video_id, 'mp4', m3u8_id=format_id,
|
|
||||||
note='Downloading %s m3u8 information' % stream_id,
|
|
||||||
fatal=False)
|
|
||||||
else:
|
|
||||||
adaptive_formats = self._extract_mpd_formats(
|
|
||||||
stream_url, video_id, mpd_id=format_id,
|
|
||||||
note='Downloading %s MPD information' % stream_id,
|
|
||||||
fatal=False)
|
|
||||||
if audio_locale:
|
|
||||||
for f in adaptive_formats:
|
|
||||||
if f.get('acodec') != 'none':
|
|
||||||
f['language'] = audio_locale
|
|
||||||
formats.extend(adaptive_formats)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|
Loading…
Reference in New Issue