From a318f59d14792d25b2206c3f50181e03e8716db7 Mon Sep 17 00:00:00 2001 From: rigstot <52549972+rigstot@users.noreply.github.com> Date: Thu, 29 Jul 2021 06:03:01 +0200 Subject: [PATCH] [generic] Support KVS player (#549) * Replaces the extractor for thisvid Fixes: https://github.com/ytdl-org/youtube-dl/issues/2077 Authored-by: rigstot --- yt_dlp/extractor/extractors.py | 1 - yt_dlp/extractor/generic.py | 165 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/thisvid.py | 97 ------------------- 3 files changed, 165 insertions(+), 98 deletions(-) delete mode 100644 yt_dlp/extractor/thisvid.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 577261ca11..991c360148 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1338,7 +1338,6 @@ from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE -from .thisvid import ThisVidIE from .threeqsdn import ThreeQSDNIE from .tiktok import TikTokIE from .tinypic import TinyPicIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 7e0598e58e..d08f8f30de 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2238,6 +2238,87 @@ class GenericIE(InfoExtractor): 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV', }, 'playlist_count': 1, + }, { + # KVS Player + 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/', + 'info_dict': { + 'id': '105', + 'display_id': 'kelis-4th-of-july', + 'ext': 'mp4', + 'title': 'Kelis - 4th Of July', + 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, { + # KVS Player + 'url': 'https://www.kvs-demo.com/embed/105/', + 'info_dict': { + 'id': '105', + 'display_id': 'kelis-4th-of-july', + 'ext': 'mp4', + 'title': 'Kelis - 4th Of July / Embed Player', + 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, { + # KVS Player + 'url': 'https://thisvid.com/videos/french-boy-pantsed/', + 'md5': '3397979512c682f6b85b3b04989df224', + 'info_dict': { + 'id': '2400174', + 'display_id': 'french-boy-pantsed', + 'ext': 'mp4', + 'title': 'French Boy Pantsed - ThisVid.com', + 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', + } + }, { + # KVS Player + 'url': 'https://thisvid.com/embed/2400174/', + 'md5': '3397979512c682f6b85b3b04989df224', + 'info_dict': { + 'id': '2400174', + 'display_id': 'french-boy-pantsed', + 'ext': 'mp4', + 'title': 'French Boy Pantsed - ThisVid.com', + 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', + } + }, { + # KVS Player + 'url': 'https://youix.com/video/leningrad-zoj/', + 'md5': '94f96ba95706dc3880812b27b7d8a2b8', + 'info_dict': { + 'id': '18485', + 'display_id': 'leningrad-zoj', + 'ext': 'mp4', + 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', + 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', + } + }, { + # KVS Player + 'url': 'https://youix.com/embed/18485', + 'md5': '94f96ba95706dc3880812b27b7d8a2b8', + 'info_dict': { + 'id': '18485', + 'display_id': 'leningrad-zoj', + 'ext': 'mp4', + 'title': 'Ленинград - ЗОЖ', + 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', + } + }, { + # KVS Player + 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', + 'md5': '94166bdb26b4cb1fb9214319a629fc51', + 'info_dict': { + 'id': '21217', + 'display_id': '40-nochey-40-nights-2016', + 'ext': 'mp4', + 'title': '40 ночей (2016) - BogMedia.org', + 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', + } }, ] @@ -2343,6 +2424,44 @@ class GenericIE(InfoExtractor): 'title': title, } + def _kvs_getrealurl(self, video_url, license_code): + if not video_url.startswith('function/0/'): + return video_url # not obfuscated + + url_path, _, url_query = video_url.partition('?') + urlparts = url_path.split('/')[2:] + license = self._kvs_getlicensetoken(license_code) + newmagic = urlparts[5][:32] + + for o in range(len(newmagic) - 1, -1, -1): + new = '' + l = (o + sum([int(n) for n in license[o:]])) % 32 + + for i in range(0, len(newmagic)): + if i == o: + new += newmagic[l] + elif i == l: + new += newmagic[o] + else: + new += newmagic[i] + newmagic = new + + urlparts[5] = newmagic + urlparts[5][32:] + return '/'.join(urlparts) + '?' + url_query + + def _kvs_getlicensetoken(self, license): + modlicense = license.replace('$', '').replace('0', '1') + center = int(len(modlicense) / 2) + fronthalf = int(modlicense[:center + 1]) + backhalf = int(modlicense[center:]) + + modlicense = str(4 * abs(fronthalf - backhalf)) + retval = '' + for o in range(0, center + 1): + for i in range(1, 5): + retval += str((int(license[o + i]) + int(modlicense[o])) % 10) + return retval + def _real_extract(self, url): if url.startswith('//'): return self.url_result(self.http_scheme() + url) @@ -3478,6 +3597,52 @@ class GenericIE(InfoExtractor): ) .*? ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) + if not found: + # Look for generic KVS player + found = re.search(r'', webpage) + flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json) + + # extract the part after the last / as the display_id from the + # canonical URL. + display_id = self._search_regex( + r'(?:' + r'|)', + webpage, 'display_id', fatal=False + ) + title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') + + thumbnail = flashvars['preview_url'] + if thumbnail.startswith('//'): + protocol, _, _ = url.partition('/') + thumbnail = protocol + thumbnail + + formats = [] + for key in ('video_url', 'video_alt_url', 'video_alt_url2'): + if key in flashvars and '/get_file/' in flashvars[key]: + next_format = { + 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']), + 'format_id': flashvars.get(key + '_text', key), + 'ext': 'mp4', + } + height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key]) + if height: + next_format['height'] = int(height.group(1)) + else: + next_format['quality'] = 1 + formats.append(next_format) + self._sort_formats(formats) + + return { + 'id': flashvars['video_id'], + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } if not found: # Broaden the search a little bit found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py deleted file mode 100644 index f507e1b067..0000000000 --- a/yt_dlp/extractor/thisvid.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals -import re - -from .common import InfoExtractor - - -class ThisVidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?Pvideos|embed)/(?P[A-Za-z0-9-]+/?)' - _TESTS = [{ - 'url': 'https://thisvid.com/videos/french-boy-pantsed/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'ext': 'mp4', - 'title': 'French Boy Pantsed', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - 'age_limit': 18, - } - }, { - 'url': 'https://thisvid.com/embed/2400174/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'ext': 'mp4', - 'title': 'French Boy Pantsed', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - 'age_limit': 18, - } - }] - - def _real_extract(self, url): - main_id = self._match_id(url) - webpage = self._download_webpage(url, main_id) - - # URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future. - kvs_version = self._html_search_regex(r'