[ie/pr0gramm] Enable POL filter and provide tags without login (#9051)

Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2024-01-23 23:20:13 +01:00 committed by GitHub
parent a40b0070c2
commit 5f25f348f9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 27 additions and 14 deletions

View File

@ -18,7 +18,6 @@ from ..utils.traversal import traverse_obj
class Pr0grammIE(InfoExtractor): class Pr0grammIE(InfoExtractor):
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)' _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
_TESTS = [{ _TESTS = [{
# Tags require account
'url': 'https://pr0gramm.com/new/video/5466437', 'url': 'https://pr0gramm.com/new/video/5466437',
'info_dict': { 'info_dict': {
'id': '5466437', 'id': '5466437',
@ -36,7 +35,6 @@ class Pr0grammIE(InfoExtractor):
'_old_archive_ids': ['pr0grammstatic 5466437'], '_old_archive_ids': ['pr0grammstatic 5466437'],
}, },
}, { }, {
# Tags require account
'url': 'https://pr0gramm.com/new/3052805:comment28391322', 'url': 'https://pr0gramm.com/new/3052805:comment28391322',
'info_dict': { 'info_dict': {
'id': '3052805', 'id': '3052805',
@ -71,6 +69,23 @@ class Pr0grammIE(InfoExtractor):
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
'_old_archive_ids': ['pr0grammstatic 5848332'], '_old_archive_ids': ['pr0grammstatic 5848332'],
}, },
}, {
'url': 'https://pr0gramm.com/top/5895149',
'info_dict': {
'id': '5895149',
'ext': 'mp4',
'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
'tags': 'count:19',
'uploader': 'algoholigSeeManThrower',
'uploader_id': 457556,
'upload_timestamp': 1697580902,
'upload_date': '20231018',
'like_count': int,
'dislike_count': int,
'age_limit': 0,
'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
'_old_archive_ids': ['pr0grammstatic 5895149'],
},
}, { }, {
'url': 'https://pr0gramm.com/static/5466437', 'url': 'https://pr0gramm.com/static/5466437',
'only_matching': True, 'only_matching': True,
@ -92,15 +107,15 @@ class Pr0grammIE(InfoExtractor):
def _maximum_flags(self): def _maximum_flags(self):
# We need to guess the flags for the content otherwise the api will raise an error # We need to guess the flags for the content otherwise the api will raise an error
# We can guess the maximum allowed flags for the account from the cookies # We can guess the maximum allowed flags for the account from the cookies
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw # Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
flags = 0b0001 flags = 0b10001
if self._is_logged_in: if self._is_logged_in:
flags |= 0b1000 flags |= 0b01000
cookies = self._get_cookies(self.BASE_URL) cookies = self._get_cookies(self.BASE_URL)
if 'me' not in cookies: if 'me' not in cookies:
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information') self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')): if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
flags |= 0b0110 flags |= 0b00110
return flags return flags
@ -134,14 +149,12 @@ class Pr0grammIE(InfoExtractor):
if not source or not source.endswith('mp4'): if not source or not source.endswith('mp4'):
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id) self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
tags = None metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
if self._is_logged_in: tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags') # Sorted by "confidence", higher confidence = earlier in list
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str})) confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
# Sorted by "confidence", higher confidence = earlier in list if confidences:
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float}))) tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
if confidences:
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
formats = traverse_obj(video_info, ('variants', ..., { formats = traverse_obj(video_info, ('variants', ..., {
'format_id': ('name', {str}), 'format_id': ('name', {str}),