From a318f59d14792d25b2206c3f50181e03e8716db7 Mon Sep 17 00:00:00 2001
From: rigstot <52549972+rigstot@users.noreply.github.com>
Date: Thu, 29 Jul 2021 06:03:01 +0200
Subject: [PATCH] [generic] Support KVS player (#549)
* Replaces the extractor for thisvid
Fixes: https://github.com/ytdl-org/youtube-dl/issues/2077
Authored-by: rigstot
---
yt_dlp/extractor/extractors.py | 1 -
yt_dlp/extractor/generic.py | 165 +++++++++++++++++++++++++++++++++
yt_dlp/extractor/thisvid.py | 97 -------------------
3 files changed, 165 insertions(+), 98 deletions(-)
delete mode 100644 yt_dlp/extractor/thisvid.py
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 577261ca11..991c360148 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1338,7 +1338,6 @@ from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
-from .thisvid import ThisVidIE
from .threeqsdn import ThreeQSDNIE
from .tiktok import TikTokIE
from .tinypic import TinyPicIE
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 7e0598e58e..d08f8f30de 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2238,6 +2238,87 @@ class GenericIE(InfoExtractor):
'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
},
'playlist_count': 1,
+ }, {
+ # KVS Player
+ 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
+ 'info_dict': {
+ 'id': '105',
+ 'display_id': 'kelis-4th-of-july',
+ 'ext': 'mp4',
+ 'title': 'Kelis - 4th Of July',
+ 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # KVS Player
+ 'url': 'https://www.kvs-demo.com/embed/105/',
+ 'info_dict': {
+ 'id': '105',
+ 'display_id': 'kelis-4th-of-july',
+ 'ext': 'mp4',
+ 'title': 'Kelis - 4th Of July / Embed Player',
+ 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # KVS Player
+ 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
+ 'md5': '3397979512c682f6b85b3b04989df224',
+ 'info_dict': {
+ 'id': '2400174',
+ 'display_id': 'french-boy-pantsed',
+ 'ext': 'mp4',
+ 'title': 'French Boy Pantsed - ThisVid.com',
+ 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://thisvid.com/embed/2400174/',
+ 'md5': '3397979512c682f6b85b3b04989df224',
+ 'info_dict': {
+ 'id': '2400174',
+ 'display_id': 'french-boy-pantsed',
+ 'ext': 'mp4',
+ 'title': 'French Boy Pantsed - ThisVid.com',
+ 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://youix.com/video/leningrad-zoj/',
+ 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+ 'info_dict': {
+ 'id': '18485',
+ 'display_id': 'leningrad-zoj',
+ 'ext': 'mp4',
+ 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
+ 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://youix.com/embed/18485',
+ 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
+ 'info_dict': {
+ 'id': '18485',
+ 'display_id': 'leningrad-zoj',
+ 'ext': 'mp4',
+ 'title': 'Ленинград - ЗОЖ',
+ 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
+ }
+ }, {
+ # KVS Player
+ 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
+ 'md5': '94166bdb26b4cb1fb9214319a629fc51',
+ 'info_dict': {
+ 'id': '21217',
+ 'display_id': '40-nochey-40-nights-2016',
+ 'ext': 'mp4',
+ 'title': '40 ночей (2016) - BogMedia.org',
+ 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
+ }
},
]
@@ -2343,6 +2424,44 @@ class GenericIE(InfoExtractor):
'title': title,
}
+ def _kvs_getrealurl(self, video_url, license_code):
+ if not video_url.startswith('function/0/'):
+ return video_url # not obfuscated
+
+ url_path, _, url_query = video_url.partition('?')
+ urlparts = url_path.split('/')[2:]
+ license = self._kvs_getlicensetoken(license_code)
+ newmagic = urlparts[5][:32]
+
+ for o in range(len(newmagic) - 1, -1, -1):
+ new = ''
+ l = (o + sum([int(n) for n in license[o:]])) % 32
+
+ for i in range(0, len(newmagic)):
+ if i == o:
+ new += newmagic[l]
+ elif i == l:
+ new += newmagic[o]
+ else:
+ new += newmagic[i]
+ newmagic = new
+
+ urlparts[5] = newmagic + urlparts[5][32:]
+ return '/'.join(urlparts) + '?' + url_query
+
+ def _kvs_getlicensetoken(self, license):
+ modlicense = license.replace('$', '').replace('0', '1')
+ center = int(len(modlicense) / 2)
+ fronthalf = int(modlicense[:center + 1])
+ backhalf = int(modlicense[center:])
+
+ modlicense = str(4 * abs(fronthalf - backhalf))
+ retval = ''
+ for o in range(0, center + 1):
+ for i in range(1, 5):
+ retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
+ return retval
+
def _real_extract(self, url):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
@@ -3478,6 +3597,52 @@ class GenericIE(InfoExtractor):
)
.*?
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
+ if not found:
+ # Look for generic KVS player
+ found = re.search(r'', webpage)
+ flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
+
+ # extract the part after the last / as the display_id from the
+ # canonical URL.
+ display_id = self._search_regex(
+ r'(?:'
+ r'|)',
+ webpage, 'display_id', fatal=False
+ )
+ title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)(?:h1|title)>', webpage, 'title')
+
+ thumbnail = flashvars['preview_url']
+ if thumbnail.startswith('//'):
+ protocol, _, _ = url.partition('/')
+ thumbnail = protocol + thumbnail
+
+ formats = []
+ for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
+ if key in flashvars and '/get_file/' in flashvars[key]:
+ next_format = {
+ 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
+ 'format_id': flashvars.get(key + '_text', key),
+ 'ext': 'mp4',
+ }
+ height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
+ if height:
+ next_format['height'] = int(height.group(1))
+ else:
+ next_format['quality'] = 1
+ formats.append(next_format)
+ self._sort_formats(formats)
+
+ return {
+ 'id': flashvars['video_id'],
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
if not found:
# Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py
deleted file mode 100644
index f507e1b067..0000000000
--- a/yt_dlp/extractor/thisvid.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-import re
-
-from .common import InfoExtractor
-
-
-class ThisVidIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?Pvideos|embed)/(?P[A-Za-z0-9-]+/?)'
- _TESTS = [{
- 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
- 'md5': '3397979512c682f6b85b3b04989df224',
- 'info_dict': {
- 'id': '2400174',
- 'ext': 'mp4',
- 'title': 'French Boy Pantsed',
- 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
- 'age_limit': 18,
- }
- }, {
- 'url': 'https://thisvid.com/embed/2400174/',
- 'md5': '3397979512c682f6b85b3b04989df224',
- 'info_dict': {
- 'id': '2400174',
- 'ext': 'mp4',
- 'title': 'French Boy Pantsed',
- 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
- 'age_limit': 18,
- }
- }]
-
- def _real_extract(self, url):
- main_id = self._match_id(url)
- webpage = self._download_webpage(url, main_id)
-
- # URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
- kvs_version = self._html_search_regex(r'