mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-10 12:40:05 +01:00
[ZenPorn] - adjustments based on PR review
This commit is contained in:
parent
5c12c01061
commit
34cd34c599
@ -1,10 +1,13 @@
|
|||||||
|
import base64
|
||||||
|
import binascii
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_strdate
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -71,81 +74,55 @@ class ZenPornIE(InfoExtractor):
|
|||||||
|
|
||||||
def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
|
def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
|
||||||
""" This function is a reverse engineering from the website javascript """
|
""" This function is a reverse engineering from the website javascript """
|
||||||
extr_id = int_or_none(extr_id)
|
result = '/'.join(str(int_or_none(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
|
||||||
if extr_id is None:
|
|
||||||
raise ExtractorError('Unable to generate the `gen_info_url`')
|
|
||||||
result = '/'.join(str(extr_id // i * i) for i in (1_000_000, 1_000, 1))
|
|
||||||
return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
|
return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
|
||||||
|
|
||||||
def _decode_video_url(self, ext_domain, encoded_url):
|
@staticmethod
|
||||||
|
def _decode_video_url(encoded_url):
|
||||||
""" This function is a reverse engineering from the website javascript """
|
""" This function is a reverse engineering from the website javascript """
|
||||||
cust_char_set = 'АВСDЕFGHIJKLМNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,~'
|
# Replace lookalike characters and standardize map
|
||||||
decoded_url = ''
|
translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
|
||||||
cur_pos = 0
|
try:
|
||||||
|
return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
|
||||||
# Check for characters not in the custom character set and handle errors
|
except (binascii.Error, ValueError):
|
||||||
if any(char not in cust_char_set for char in encoded_url):
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Filter out characters not in the custom character set
|
|
||||||
encoded_url = ''.join(char for char in encoded_url if char in cust_char_set)
|
|
||||||
|
|
||||||
while cur_pos < len(encoded_url):
|
|
||||||
o = cust_char_set.index(encoded_url[cur_pos])
|
|
||||||
i = cust_char_set.index(encoded_url[cur_pos + 1])
|
|
||||||
s = cust_char_set.index(encoded_url[cur_pos + 2])
|
|
||||||
a = cust_char_set.index(encoded_url[cur_pos + 3])
|
|
||||||
|
|
||||||
o = (o << 2) | (i >> 4)
|
|
||||||
i = ((i & 15) << 4) | (s >> 2)
|
|
||||||
l = ((s & 3) << 6) | a
|
|
||||||
|
|
||||||
decoded_url += chr(o)
|
|
||||||
if s != 64:
|
|
||||||
decoded_url += chr(i)
|
|
||||||
if a != 64:
|
|
||||||
decoded_url += chr(l)
|
|
||||||
|
|
||||||
cur_pos += 4
|
|
||||||
|
|
||||||
return f'https://{ext_domain}{decoded_url}'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
ext_domain, extr_id = self._search_regex(
|
ext_domain, extr_id = self._search_regex(
|
||||||
r'https:\/\/(?P<ext_domain>[\w.-]+\.\w{3})\/embed\/(?P<extr_id>\d+)\/',
|
r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
|
||||||
webpage, 'embed_info', group=('ext_domain', 'extr_id'))
|
webpage, 'embed info', group=('ext_domain', 'extr_id'), fatal=True)
|
||||||
|
|
||||||
info_json = self._download_json(self._gen_info_url(ext_domain, extr_id),
|
info_json = self._download_json(
|
||||||
video_id, note="Downloading JSON metadata for the video info.")
|
self._gen_info_url(ext_domain, extr_id), video_id,
|
||||||
if not info_json:
|
note='Downloading video info JSON', fatal=True)
|
||||||
raise ExtractorError('Unable to retrieve the video info.')
|
|
||||||
|
|
||||||
video_json = self._download_json(f'https://{ext_domain}/api/videofile.php?video_id={extr_id}&lifetime=8640000',
|
video_json = self._download_json(
|
||||||
video_id, note="Downloading JSON metadata for the video location.")
|
f'https://{ext_domain}/api/videofile.php', video_id, query={
|
||||||
if not video_json:
|
'video_id': extr_id,
|
||||||
raise ExtractorError('Unable to retrieve the the video location.')
|
'lifetime': 8640000,
|
||||||
|
}, note='Downloading video metadata JSON', fatal=True)
|
||||||
|
|
||||||
encoded_url = video_json[0].get('video_url')
|
encoded_url = video_json[0]['video_url']
|
||||||
if not encoded_url:
|
|
||||||
raise ExtractorError('Unable to retrieve the `encoded_url` value.')
|
|
||||||
|
|
||||||
download_url = self._decode_video_url(ext_domain, encoded_url)
|
decoded_url = self._decode_video_url(encoded_url)
|
||||||
if not download_url:
|
if not decoded_url:
|
||||||
raise ExtractorError('Unable to retrieve the ``download_url``.')
|
raise ExtractorError('Unable to decode the video url')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'extr_id': extr_id,
|
'extr_id': extr_id,
|
||||||
'ext': determine_ext(video_json[0].get('format')),
|
'ext': traverse_obj(
|
||||||
'title': traverse_obj(info_json, ('video', 'title')),
|
video_json, (..., 'format', {determine_ext}), get_all=False),
|
||||||
'description': traverse_obj(info_json, ('video', 'description')),
|
'url': f'https://{ext_domain}{decoded_url}',
|
||||||
'thumbnail': traverse_obj(info_json, ('video', 'thumb')),
|
'age_limit': 18,
|
||||||
'post_date': unified_strdate(traverse_obj(info_json, ('video', 'post_date'))),
|
**traverse_obj(info_json, ('video', {
|
||||||
'uploader': traverse_obj(info_json, ('video', 'user', 'username')),
|
'title': ('title', {str}),
|
||||||
'url': download_url,
|
'description': ('description', {str}),
|
||||||
'age_limit': 18
|
'thumbnail': ('thumb', {url_or_none}),
|
||||||
|
'post_date': ('post_date', {unified_strdate}),
|
||||||
|
'uploader': ('user', 'username', {str}),
|
||||||
|
})),
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user