mirror of https://github.com/yt-dlp/yt-dlp.git
[extractor/youtube] Fix continuation loop with no comments (#7148)
Deep check the response for incomplete data. Authored by: coletdjnz
This commit is contained in:
parent
c2502cfed9
commit
18f8fba7c8
|
@ -3314,7 +3314,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
expected_comment_count = self._get_count(
|
||||
comments_header_renderer, 'countText', 'commentsCount')
|
||||
|
||||
if expected_comment_count:
|
||||
if expected_comment_count is not None:
|
||||
tracker['est_total'] = expected_comment_count
|
||||
self.to_screen(f'Downloading ~{expected_comment_count} comments')
|
||||
comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
|
||||
|
@ -3385,7 +3385,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
if not tracker:
|
||||
tracker = dict(
|
||||
running_total=0,
|
||||
est_total=0,
|
||||
est_total=None,
|
||||
current_page_thread=0,
|
||||
total_parent_comments=0,
|
||||
total_reply_comments=0,
|
||||
|
@ -3418,11 +3418,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
|
||||
is_forced_continuation = True
|
||||
|
||||
continuation_items_path = (
|
||||
'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
|
||||
for page_num in itertools.count(0):
|
||||
if not continuation:
|
||||
break
|
||||
headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
|
||||
comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
|
||||
comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
|
||||
if page_num == 0:
|
||||
if is_first_continuation:
|
||||
note_prefix = 'Downloading comment section API JSON'
|
||||
|
@ -3433,11 +3435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
|
||||
' ' if parent else '', ' replies' if parent else '',
|
||||
page_num, comment_prog_str)
|
||||
|
||||
# Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
|
||||
# Ignore check if YouTube says the comment count is 0.
|
||||
check_get_keys = None
|
||||
if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
|
||||
check_get_keys = [[*continuation_items_path, ..., (
|
||||
'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
|
||||
try:
|
||||
response = self._extract_response(
|
||||
item_id=None, query=continuation,
|
||||
ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
|
||||
check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
|
||||
check_get_keys=check_get_keys)
|
||||
except ExtractorError as e:
|
||||
# Ignore incomplete data error for replies if retries didn't work.
|
||||
# This is to allow any other parent comments and comment threads to be downloaded.
|
||||
|
@ -3449,15 +3458,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
else:
|
||||
raise
|
||||
is_forced_continuation = False
|
||||
continuation_contents = traverse_obj(
|
||||
response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
|
||||
|
||||
continuation = None
|
||||
for continuation_section in continuation_contents:
|
||||
continuation_items = traverse_obj(
|
||||
continuation_section,
|
||||
(('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
|
||||
get_all=False, expected_type=list) or []
|
||||
for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
|
||||
if is_first_continuation:
|
||||
continuation = extract_header(continuation_items)
|
||||
is_first_continuation = False
|
||||
|
|
Loading…
Reference in New Issue