mirror of https://github.com/yt-dlp/yt-dlp.git
[jukebox] remove extractor and handle it using generic extractor
This commit is contained in:
parent
0940c5b4c6
commit
6aeba407db
|
@ -262,7 +262,6 @@ from .izlesene import IzleseneIE
|
||||||
from .jadorecettepub import JadoreCettePubIE
|
from .jadorecettepub import JadoreCettePubIE
|
||||||
from .jeuxvideo import JeuxVideoIE
|
from .jeuxvideo import JeuxVideoIE
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .jukebox import JukeboxIE
|
|
||||||
from .jpopsukitv import JpopsukiIE
|
from .jpopsukitv import JpopsukiIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .kanalplay import KanalPlayIE
|
from .kanalplay import KanalPlayIE
|
||||||
|
|
|
@ -50,6 +50,7 @@ from .dailymotion import DailymotionCloudIE
|
||||||
from .onionstudios import OnionStudiosIE
|
from .onionstudios import OnionStudiosIE
|
||||||
from .snagfilms import SnagFilmsEmbedIE
|
from .snagfilms import SnagFilmsEmbedIE
|
||||||
from .screenwavemedia import ScreenwaveMediaIE
|
from .screenwavemedia import ScreenwaveMediaIE
|
||||||
|
from .ultimedia import UltimediaIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -1029,6 +1030,21 @@ class GenericIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'cinemasnob',
|
'title': 'cinemasnob',
|
||||||
},
|
},
|
||||||
|
},
|
||||||
|
# Ultimedia embed
|
||||||
|
{
|
||||||
|
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
||||||
|
'md5': '25551df6e7c7ab8096ceeeae048c5f64',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'r303r',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kosheen - Pride (live)',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 293,
|
||||||
|
'upload_date': '20081103',
|
||||||
|
'timestamp': 1225733392,
|
||||||
|
'uploader_id': '33m03',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1751,6 +1767,11 @@ class GenericIE(InfoExtractor):
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
|
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
|
||||||
|
|
||||||
|
# Look for Ulltimedia embeds
|
||||||
|
ultimedia_url = UltimediaIE._extract_url(webpage)
|
||||||
|
if ultimedia_url:
|
||||||
|
return self.url_result(self._proto_relative_url(ultimedia_url), 'Ultimedia')
|
||||||
|
|
||||||
# Look for AdobeTVVideo embeds
|
# Look for AdobeTVVideo embeds
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||||
|
|
|
@ -1,59 +0,0 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
RegexNotFoundError,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JukeboxIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<id>[a-z0-9\-]+)\.html'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.jukebox.es/kosheen/videoclip,pride,r303r.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'r303r',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Kosheen-En Vivo Pride',
|
|
||||||
'uploader': 'Kosheen',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
html = self._download_webpage(url, video_id)
|
|
||||||
iframe_url = unescapeHTML(self._search_regex(r'<iframe .*src="([^"]*)"', html, 'iframe url'))
|
|
||||||
|
|
||||||
iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
|
|
||||||
if re.search(r'class="jkb_waiting"', iframe_html) is not None:
|
|
||||||
raise ExtractorError('Video is not available(in your country?)!')
|
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
|
||||||
|
|
||||||
try:
|
|
||||||
video_url = self._search_regex(r'"config":{"file":"(?P<video_url>http:[^"]+\?mdtk=[0-9]+)"',
|
|
||||||
iframe_html, 'video url')
|
|
||||||
video_url = unescapeHTML(video_url).replace('\/', '/')
|
|
||||||
except RegexNotFoundError:
|
|
||||||
youtube_url = self._search_regex(
|
|
||||||
r'config":{"file":"(http:\\/\\/www\.youtube\.com\\/watch\?v=[^"]+)"',
|
|
||||||
iframe_html, 'youtube url')
|
|
||||||
youtube_url = unescapeHTML(youtube_url).replace('\/', '/')
|
|
||||||
self.to_screen('Youtube video detected')
|
|
||||||
return self.url_result(youtube_url, ie='Youtube')
|
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h1 class="inline">([^<]+)</h1>',
|
|
||||||
html, 'title')
|
|
||||||
artist = self._html_search_regex(r'<span id="infos_article_artist">([^<]+)</span>',
|
|
||||||
html, 'artist')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': artist + '-' + title,
|
|
||||||
'uploader': artist,
|
|
||||||
}
|
|
|
@ -4,102 +4,83 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
from ..utils import int_or_none
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
qualities,
|
|
||||||
unified_strdate,
|
|
||||||
clean_html,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class UltimediaIE(InfoExtractor):
|
class UltimediaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
|
_VALID_URL = r'https?://(?:www\.)?ultimedia\.com/deliver/(?P<type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# news
|
# news
|
||||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01601930/zone/1/src/s8uk0r/autoplay/yes/ad/no/width/714/height/435',
|
||||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 's8uk0r',
|
'id': 's8uk0r',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||||
'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 74,
|
||||||
'upload_date': '20150317',
|
'upload_date': '20150317',
|
||||||
|
'timestamp': 1426604939,
|
||||||
|
'uploader_id': '3fszv',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# music
|
# music
|
||||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
'url': 'https://www.ultimedia.com/deliver/musique/iframe/mdtk/01601930/zone/1/article/xvpfp8/autoplay/yes/ad/no/width/714/height/435',
|
||||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'xvpfp8',
|
'id': 'xvpfp8',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Two - C'est la vie (Clip)",
|
'title': 'Two - C\'est La Vie (clip)',
|
||||||
'description': 'Two',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
|
'duration': 233,
|
||||||
'upload_date': '20150224',
|
'upload_date': '20150224',
|
||||||
|
'timestamp': 1424760500,
|
||||||
|
'uploader_id': '3rfzk',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
mobj = re.search(
|
||||||
|
r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)',
|
||||||
|
webpage)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group('url')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
deliver_url = self._proto_relative_url(self._search_regex(
|
deliver_info = self._download_json(
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
|
'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type),
|
||||||
webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':')
|
|
||||||
|
|
||||||
deliver_page = self._download_webpage(
|
|
||||||
deliver_url, video_id, 'Downloading iframe page')
|
|
||||||
|
|
||||||
if '>This video is currently not available' in deliver_page:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Video %s is currently not available' % video_id, expected=True)
|
|
||||||
|
|
||||||
player = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on",
|
|
||||||
deliver_page, 'player'),
|
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
quality = qualities(['flash', 'html5'])
|
yt_id = deliver_info.get('yt_id')
|
||||||
|
if yt_id:
|
||||||
|
return self.url_result(yt_id, 'Youtube')
|
||||||
|
|
||||||
|
jwconf = deliver_info['jwconf']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for mode in player['modes']:
|
for source in jwconf['playlist'][0]['sources']:
|
||||||
video_url = mode.get('config', {}).get('file')
|
|
||||||
if not video_url:
|
|
||||||
continue
|
|
||||||
if re.match(r'https?://www\.youtube\.com/.+?', video_url):
|
|
||||||
return self.url_result(video_url, 'Youtube')
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': source['file'],
|
||||||
'format_id': mode.get('type'),
|
'format_id': source.get('label'),
|
||||||
'quality': quality(mode.get('type')),
|
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = player.get('image')
|
title = deliver_info['title']
|
||||||
|
thumbnail = jwconf.get('image')
|
||||||
title = clean_html((
|
duration = int_or_none(deliver_info.get('duration'))
|
||||||
self._html_search_regex(
|
timestamp = int_or_none(deliver_info.get('release_time'))
|
||||||
r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
|
uploader_id = deliver_info.get('owner_id')
|
||||||
webpage, 'title', default=None) or
|
|
||||||
self._search_regex(
|
|
||||||
r"var\s+nameVideo\s*=\s*'([^']+)'",
|
|
||||||
deliver_page, 'title')))
|
|
||||||
|
|
||||||
description = clean_html(self._html_search_regex(
|
|
||||||
r'(?s)<span>Description</span>(.+?)</p>', webpage,
|
|
||||||
'description', fatal=False))
|
|
||||||
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
|
||||||
r'Ajouté le\s*<span>([^<]+)', webpage,
|
|
||||||
'upload date', fatal=False))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue