[generic] Refactor `_extract_rss`

Closes #3738
This commit is contained in:
pukkandan 2022-05-18 04:14:13 +05:30
parent 7896214c42
commit d6bf1161db
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
1 changed files with 14 additions and 35 deletions

View File

@ -129,6 +129,7 @@ from ..utils import (
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
try_call,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
@ -2536,66 +2537,44 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug(f'Identified a {name}') self._downloader.write_debug(f'Identified a {name}')
def _extract_rss(self, url, video_id, doc): def _extract_rss(self, url, video_id, doc):
playlist_title = doc.find('./channel/title').text
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
NS_MAP = { NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
} }
entries = [] entries = []
for it in doc.findall('./channel/item'): for it in doc.findall('./channel/item'):
next_url = None next_url = next(
enclosure_nodes = it.findall('./enclosure') (e.attrib.get('url') for e in it.findall('./enclosure')),
for e in enclosure_nodes: xpath_text(it, 'link', fatal=False))
next_url = e.attrib.get('url')
if next_url:
break
if not next_url:
next_url = xpath_text(it, 'link', fatal=False)
if not next_url: if not next_url:
continue continue
if it.find('guid').text is not None: guid = try_call(lambda: it.find('guid').text)
next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text}) if guid:
next_url = smuggle_url(next_url, {'force_videoid': guid})
def itunes(key): def itunes(key):
return xpath_text( return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
default=None)
duration = itunes('duration')
explicit = (itunes('explicit') or '').lower()
if explicit in ('true', 'yes'):
age_limit = 18
elif explicit in ('false', 'no'):
age_limit = 0
else:
age_limit = None
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': next_url, 'url': next_url,
'title': it.find('title').text, 'title': try_call(lambda: it.find('title').text),
'description': xpath_text(it, 'description', default=None), 'description': xpath_text(it, 'description', default=None),
'timestamp': unified_timestamp( 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
xpath_text(it, 'pubDate', default=None)), 'duration': parse_duration(itunes('duration')),
'duration': int_or_none(duration) or parse_duration(duration),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')), 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
'episode': itunes('title'), 'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')), 'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')), 'season_number': int_or_none(itunes('season')),
'age_limit': age_limit, 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
}) })
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': url, 'id': url,
'title': playlist_title, 'title': try_call(lambda: doc.find('./channel/title').text),
'description': playlist_desc, 'description': try_call(lambda: doc.find('./channel/description').text),
'entries': entries, 'entries': entries,
} }