From 6efb0711357dfe1ecde30fad68b7724a526f2190 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 22 Apr 2021 04:13:04 +0530 Subject: [PATCH] [BilibiliChannel] Fix pagination Closes #222 https://github.com/nao20010128nao/ytdl-patched/commit/ccca21d7f57f1330a205f07d369e31883ba006c2 Coauthored by: nao20010128nao, pukkandan --- yt_dlp/extractor/bilibili.py | 51 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0029a7e49b..b7b60b77eb 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import hashlib +import itertools import json import re @@ -498,28 +499,40 @@ class BiliBiliBangumiIE(InfoExtractor): class BilibiliChannelIE(InfoExtractor): _VALID_URL = r'https?://space.bilibili\.com/(?P\d+)' - # May need to add support for pagination? Need to find a user with many video uploads to test - _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=1&ps=25&jsonp=jsonp" - _TEST = {} # TODO: Add tests + _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp" + _TESTS = [{ + 'url': 'https://space.bilibili.com/3985676/video', + 'info_dict': {}, + 'playlist_mincount': 112, + }] + + def _entries(self, list_id): + count, max_count = 0, None + + for page_num in itertools.count(1): + data = self._parse_json( + self._download_webpage( + self._API_URL % (list_id, page_num), list_id, + note='Downloading page %d' % page_num), + list_id)['data'] + + max_count = max_count or try_get(data, lambda x: x['page']['count']) + + entries = try_get(data, lambda x: x['list']['vlist']) + if not entries: + return + for entry in entries: + yield self.url_result( + 'https://www.bilibili.com/video/%s' % entry['bvid'], + BiliBiliIE.ie_key(), entry['bvid']) + + count += len(entries) + if max_count and count >= max_count: + return def _real_extract(self, url): list_id = self._match_id(url) - json_str = self._download_webpage(self._API_URL % list_id, "None") - - json_parsed = json.loads(json_str) - entries = [{ - '_type': 'url', - 'ie_key': BiliBiliIE.ie_key(), - 'url': ('https://www.bilibili.com/video/%s' % - entry['bvid']), - 'id': entry['bvid'], - } for entry in json_parsed['data']['list']['vlist']] - - return { - '_type': 'playlist', - 'id': list_id, - 'entries': entries - } + return self.playlist_result(self._entries(list_id), list_id) class BiliBiliSearchIE(SearchInfoExtractor):