1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-06 00:49:20 +02:00

Remove comment extraction code for now

Can be added back in a separate PR
This commit is contained in:
sepro
2024-11-11 21:14:56 +01:00
parent 727c762311
commit 19508dd192
2 changed files with 0 additions and 45 deletions

View File

@@ -1864,11 +1864,6 @@ The following extractors use this feature:
#### digitalconcerthall #### digitalconcerthall
* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats * `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats
#### mixchmovie
* `max_comments`: Maximum number of comments to extract - default is `120`. The final amount of comments is less than the limit because gifts are filtered.
* `fetch_interval_sec`: Comment json files fetching interval. If `max_comments` is set too large, fetch interval need to be limit
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View File

@@ -3,7 +3,6 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
UserNotLive, UserNotLive,
bool_or_none,
int_or_none, int_or_none,
str_or_none, str_or_none,
url_or_none, url_or_none,
@@ -169,43 +168,4 @@ class MixchMovieIE(InfoExtractor):
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}), 'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}),
}), }),
'live_status': 'not_live', 'live_status': 'not_live',
'__post_extractor': self.extract_comments(video_id),
} }
def _get_comments(self, video_id):
# Comments are organized in a json chain, connected with 'nextCursor' property.
# There are up to 20 comments in one json file.
COMMENTS_LIMIT = 20
# If json files are downloaded too frequently, the server might ban all the access from your IP.
comments_left = int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120
fetch_interval_sec = int_or_none(self._configuration_arg('fetch_interval_sec', [''])[0])
base_url = f'https://mixch.tv/api-web/movies/{video_id}/comments'
has_next = True
next_cursor = ''
fragment = 1
while has_next and (comments_left > 0):
data = self._download_json(
base_url, video_id,
note=f'Downloading comments, fragment {fragment}', errnote='Failed to download comments',
query={'cursor': next_cursor, 'limit': COMMENTS_LIMIT})
fragment += 1
comments_left -= COMMENTS_LIMIT
# Some of the 'comments' are not real comments but gifts.
# Only real comments are extracted here.
yield from traverse_obj(data, ('comments', lambda _, v: v['comment'], {
'author': ('user_name', {str}),
'author_id': ('user_id', {int_or_none}),
'author_thumbnail': ('profile_image_url', {url_or_none}),
'id': ('id', {int_or_none}),
'text': ('comment', {str_or_none}),
'timestamp': ('created', {int_or_none}),
}))
if fetch_interval_sec:
self._sleep(fetch_interval_sec, video_id)
has_next = traverse_obj(data, ('hasNext'), {bool_or_none})
next_cursor = traverse_obj(data, ('nextCursor'), {str_or_none})