mirror of https://github.com/yt-dlp/yt-dlp.git
[mtv] Add mtv.it and extract series metadata (#156)
* New extractors: MTVItalia, MTVItaliaProgramma * Extract fields: series, season_number, episode_number Authored-by: nixxo
This commit is contained in:
parent
994443d24d
commit
605b684c2d
|
@ -732,6 +732,8 @@ from .mtv import (
|
||||||
MTVServicesEmbeddedIE,
|
MTVServicesEmbeddedIE,
|
||||||
MTVDEIE,
|
MTVDEIE,
|
||||||
MTVJapanIE,
|
MTVJapanIE,
|
||||||
|
MTVItaliaIE,
|
||||||
|
MTVItaliaProgrammaIE,
|
||||||
)
|
)
|
||||||
from .muenchentv import MuenchenTVIE
|
from .muenchentv import MuenchenTVIE
|
||||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||||
|
|
|
@ -14,6 +14,7 @@ from ..utils import (
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
@ -176,6 +177,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
raise ExtractorError('Could not find video title')
|
raise ExtractorError('Could not find video title')
|
||||||
title = title.strip()
|
title = title.strip()
|
||||||
|
|
||||||
|
series = find_xpath_attr(
|
||||||
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:franchise')
|
||||||
|
season = find_xpath_attr(
|
||||||
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:seasonN')
|
||||||
|
episode = find_xpath_attr(
|
||||||
|
itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
'scheme', 'urn:mtvn:episodeN')
|
||||||
|
series = series.text if series is not None else None
|
||||||
|
season = season.text if season is not None else None
|
||||||
|
episode = episode.text if episode is not None else None
|
||||||
|
if season and episode:
|
||||||
|
# episode number includes season, so remove it
|
||||||
|
episode = re.sub(r'^%s' % season, '', episode)
|
||||||
|
|
||||||
# This a short id that's used in the webpage urls
|
# This a short id that's used in the webpage urls
|
||||||
mtvn_id = None
|
mtvn_id = None
|
||||||
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
|
||||||
|
@ -201,6 +218,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': float_or_none(content_el.attrib.get('duration')),
|
'duration': float_or_none(content_el.attrib.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
|
'series': series,
|
||||||
|
'season_number': int_or_none(season),
|
||||||
|
'episode_number': int_or_none(episode),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_feed_query(self, uri):
|
def _get_feed_query(self, uri):
|
||||||
|
@ -483,3 +503,152 @@ class MTVDEIE(MTVServicesInfoExtractor):
|
||||||
'arcEp': 'mtv.de',
|
'arcEp': 'mtv.de',
|
||||||
'mgid': uri,
|
'mgid': uri,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MTVItaliaIE(MTVServicesInfoExtractor):
|
||||||
|
IE_NAME = 'mtv.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cavoli amario (episodio completo)',
|
||||||
|
'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
|
||||||
|
'series': 'Mario - Una Serie Di Maccio Capatonda',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_GEO_COUNTRIES = ['IT']
|
||||||
|
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
|
|
||||||
|
def _get_feed_query(self, uri):
|
||||||
|
return {
|
||||||
|
'arcEp': 'mtv.it',
|
||||||
|
'mgid': uri,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MTVItaliaProgrammaIE(MTVItaliaIE):
|
||||||
|
IE_NAME = 'mtv.it:programma'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# program page: general
|
||||||
|
'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
|
||||||
|
'title': 'Mario - Una Serie Di Maccio Capatonda',
|
||||||
|
'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# program page: specific season
|
||||||
|
'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
|
||||||
|
'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
|
||||||
|
},
|
||||||
|
'playlist_count': 34,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# playlist page + redirect
|
||||||
|
'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
|
||||||
|
'title': 'Sexy Videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 145,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_GEO_COUNTRIES = ['IT']
|
||||||
|
_FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
|
||||||
|
|
||||||
|
def _get_entries(self, title, url):
|
||||||
|
while True:
|
||||||
|
pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
|
||||||
|
entries = self._download_json(url, title, 'page %s' % pg)
|
||||||
|
url = try_get(
|
||||||
|
entries, lambda x: x['result']['nextPageURL'], compat_str)
|
||||||
|
entries = try_get(
|
||||||
|
entries, (
|
||||||
|
lambda x: x['result']['data']['items'],
|
||||||
|
lambda x: x['result']['data']['seasons']),
|
||||||
|
list)
|
||||||
|
for entry in entries or []:
|
||||||
|
if entry.get('canonicalURL'):
|
||||||
|
yield self.url_result(entry['canonicalURL'])
|
||||||
|
if not url:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = {'url': url}
|
||||||
|
info_url = update_url_query(self._FEED_URL, query)
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
info = self._download_json(info_url, video_id).get('manifest')
|
||||||
|
|
||||||
|
redirect = try_get(
|
||||||
|
info, lambda x: x['newLocation']['url'], compat_str)
|
||||||
|
if redirect:
|
||||||
|
return self.url_result(redirect)
|
||||||
|
|
||||||
|
title = info.get('title')
|
||||||
|
video_id = try_get(
|
||||||
|
info, lambda x: x['reporting']['itemId'], compat_str)
|
||||||
|
parent_id = try_get(
|
||||||
|
info, lambda x: x['reporting']['parentId'], compat_str)
|
||||||
|
|
||||||
|
playlist_url = current_url = None
|
||||||
|
for z in (info.get('zones') or {}).values():
|
||||||
|
if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
|
||||||
|
info_url = z.get('feed')
|
||||||
|
if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
|
||||||
|
playlist_url = playlist_url or z.get('feed')
|
||||||
|
if z.get('moduleName') in ('INTL_M300',):
|
||||||
|
current_url = current_url or z.get('feed')
|
||||||
|
|
||||||
|
if not info_url:
|
||||||
|
raise ExtractorError('No info found')
|
||||||
|
|
||||||
|
if video_id == parent_id:
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'([^\/]+)/[^\/]+$', info_url, 'video_id')
|
||||||
|
|
||||||
|
info = self._download_json(info_url, video_id, 'Show infos')
|
||||||
|
info = try_get(info, lambda x: x['result']['data'], dict)
|
||||||
|
title = title or try_get(
|
||||||
|
info, (
|
||||||
|
lambda x: x['title'],
|
||||||
|
lambda x: x['headline']),
|
||||||
|
compat_str)
|
||||||
|
description = try_get(info, lambda x: x['content'], compat_str)
|
||||||
|
|
||||||
|
if current_url:
|
||||||
|
season = try_get(
|
||||||
|
self._download_json(playlist_url, video_id, 'Seasons info'),
|
||||||
|
lambda x: x['result']['data'], dict)
|
||||||
|
current = try_get(
|
||||||
|
season, lambda x: x['currentSeason'], compat_str)
|
||||||
|
seasons = try_get(
|
||||||
|
season, lambda x: x['seasons'], list) or []
|
||||||
|
|
||||||
|
if current in [s.get('eTitle') for s in seasons]:
|
||||||
|
playlist_url = current_url
|
||||||
|
|
||||||
|
title = re.sub(
|
||||||
|
r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
|
||||||
|
'', title, flags=re.IGNORECASE).strip()
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._get_entries(title, playlist_url),
|
||||||
|
video_id, title, description)
|
||||||
|
|
Loading…
Reference in New Issue