1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-12-08 22:46:39 +01:00
yt-dlp/yt_dlp/extractor/podbayfm.py

76 lines
2.6 KiB
Python
Raw Normal View History

from .common import InfoExtractor
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call
def result_from_props(props, episode_id=None):
return {
'id': props.get('podcast_id') or episode_id,
'title': props.get('title'),
'url': props['mediaURL'],
'ext': 'mp3',
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']),
'timestamp': props.get('timestamp'),
'duration': int_or_none(props.get('duration')),
}
class PodbayFMIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$'
_TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
'md5': '98b41285dcf7989d105a4ed0404054cf',
'info_dict': {
'id': '1647338400',
'title': 'Part One: Kissinger',
'ext': 'mp3',
'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1647338400,
'duration': 5001,
'upload_date': '20220315',
},
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
data = self._search_nextjs_data(webpage, episode_id)
return result_from_props(data['props']['pageProps']['episode'], episode_id)
class PodbayFMChannelIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$'
_TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards',
'info_dict': {
'id': 'behind-the-bastards',
'title': 'Behind the Bastards',
},
}]
_PAGE_SIZE = 10
def _fetch_page(self, channel_id, pagenum):
return self._download_json(
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
channel_id)['podcast']
@staticmethod
def _results_from_page(channel_id, page):
return [{
**result_from_props(e),
'extractor': PodbayFMIE.IE_NAME,
'extractor_key': PodbayFMIE.ie_key(),
# somehow they use timestamps as the episode identifier
'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}',
} for e in page['episodes']]
def _real_extract(self, url):
channel_id = self._match_id(url)
first_page = self._fetch_page(channel_id, 0)
entries = OnDemandPagedList(
lambda pagenum: self._results_from_page(
channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page),
self._PAGE_SIZE)
return self.playlist_result(entries, channel_id, first_page.get('title'))