[generic] Support KVS player (#549)

* Replaces the extractor for thisvid Fixes: https://github.com/ytdl-org/youtube-dl/issues/2077 Authored-by: rigstot
2021-07-29 06:03:01 +02:00 · 2021-07-29 06:03:01 +02:00 · a318f59d14
parent 7d1eb38af1
commit a318f59d14
3 changed files with 165 additions and 98 deletions
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -1338,7 +1338,6 @@ from .theweatherchannel import TheWeatherChannelIE
 from .thisamericanlife import ThisAmericanLifeIE
 from .thisav import ThisAVIE
 from .thisoldhouse import ThisOldHouseIE
 from .thisvid import ThisVidIE
 from .threeqsdn import ThreeQSDNIE
 from .tiktok import TikTokIE
 from .tinypic import TinyPicIE
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2238,6 +2238,87 @@ class GenericIE(InfoExtractor):
                'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
            },
            'playlist_count': 1,
        }, {
            # KVS Player
            'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
            'info_dict': {
                'id': '105',
                'display_id': 'kelis-4th-of-july',
                'ext': 'mp4',
                'title': 'Kelis - 4th Of July',
                'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
            },
            'params': {
                'skip_download': True,
            },
        }, {
            # KVS Player
            'url': 'https://www.kvs-demo.com/embed/105/',
            'info_dict': {
                'id': '105',
                'display_id': 'kelis-4th-of-july',
                'ext': 'mp4',
                'title': 'Kelis - 4th Of July / Embed Player',
                'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
            },
            'params': {
                'skip_download': True,
            },
        }, {
            # KVS Player
            'url': 'https://thisvid.com/videos/french-boy-pantsed/',
            'md5': '3397979512c682f6b85b3b04989df224',
            'info_dict': {
                'id': '2400174',
                'display_id': 'french-boy-pantsed',
                'ext': 'mp4',
                'title': 'French Boy Pantsed - ThisVid.com',
                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
            }
        }, {
            # KVS Player
            'url': 'https://thisvid.com/embed/2400174/',
            'md5': '3397979512c682f6b85b3b04989df224',
            'info_dict': {
                'id': '2400174',
                'display_id': 'french-boy-pantsed',
                'ext': 'mp4',
                'title': 'French Boy Pantsed - ThisVid.com',
                'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
            }
        }, {
            # KVS Player
            'url': 'https://youix.com/video/leningrad-zoj/',
            'md5': '94f96ba95706dc3880812b27b7d8a2b8',
            'info_dict': {
                'id': '18485',
                'display_id': 'leningrad-zoj',
                'ext': 'mp4',
                'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
                'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
            }
        }, {
            # KVS Player
            'url': 'https://youix.com/embed/18485',
            'md5': '94f96ba95706dc3880812b27b7d8a2b8',
            'info_dict': {
                'id': '18485',
                'display_id': 'leningrad-zoj',
                'ext': 'mp4',
                'title': 'Ленинград - ЗОЖ',
                'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
            }
        }, {
            # KVS Player
            'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
            'md5': '94166bdb26b4cb1fb9214319a629fc51',
            'info_dict': {
                'id': '21217',
                'display_id': '40-nochey-40-nights-2016',
                'ext': 'mp4',
                'title': '40 ночей (2016) - BogMedia.org',
                'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
            }
        },
    ]
@ -2343,6 +2424,44 @@ class GenericIE(InfoExtractor):
            'title': title,
        }
    def _kvs_getrealurl(self, video_url, license_code):
        if not video_url.startswith('function/0/'):
            return video_url  # not obfuscated
        url_path, _, url_query = video_url.partition('?')
        urlparts = url_path.split('/')[2:]
        license = self._kvs_getlicensetoken(license_code)
        newmagic = urlparts[5][:32]
        for o in range(len(newmagic) - 1, -1, -1):
            new = ''
            l = (o + sum([int(n) for n in license[o:]])) % 32
            for i in range(0, len(newmagic)):
                if i == o:
                    new += newmagic[l]
                elif i == l:
                    new += newmagic[o]
                else:
                    new += newmagic[i]
            newmagic = new
        urlparts[5] = newmagic + urlparts[5][32:]
        return '/'.join(urlparts) + '?' + url_query
    def _kvs_getlicensetoken(self, license):
        modlicense = license.replace('$', '').replace('0', '1')
        center = int(len(modlicense) / 2)
        fronthalf = int(modlicense[:center + 1])
        backhalf = int(modlicense[center:])
        modlicense = str(4 * abs(fronthalf - backhalf))
        retval = ''
        for o in range(0, center + 1):
            for i in range(1, 5):
                retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
        return retval
    def _real_extract(self, url):
        if url.startswith('//'):
            return self.url_result(self.http_scheme() + url)
@ -3478,6 +3597,52 @@ class GenericIE(InfoExtractor):
                )
                .*?
                ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
        if not found:
            # Look for generic KVS player
            found = re.search(r'<script [^>]*?src="https://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
            if found:
                if found.group('maj_ver') not in ['4', '5']:
                    self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
                flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
                flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
                # extract the part after the last / as the display_id from the
                # canonical URL.
                display_id = self._search_regex(
                    r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
                    r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
                    webpage, 'display_id', fatal=False
                )
                title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
                thumbnail = flashvars['preview_url']
                if thumbnail.startswith('//'):
                    protocol, _, _ = url.partition('/')
                    thumbnail = protocol + thumbnail
                formats = []
                for key in ('video_url', 'video_alt_url', 'video_alt_url2'):
                    if key in flashvars and '/get_file/' in flashvars[key]:
                        next_format = {
                            'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
                            'format_id': flashvars.get(key + '_text', key),
                            'ext': 'mp4',
                        }
                        height = re.search(r'%s_(\d+)p\.mp4(?:/[?].*)?$' % flashvars['video_id'], flashvars[key])
                        if height:
                            next_format['height'] = int(height.group(1))
                        else:
                            next_format['quality'] = 1
                        formats.append(next_format)
                self._sort_formats(formats)
                return {
                    'id': flashvars['video_id'],
                    'display_id': display_id,
                    'title': title,
                    'thumbnail': thumbnail,
                    'formats': formats,
                }
        if not found:
            # Broaden the search a little bit
            found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
--- a/yt_dlp/extractor/thisvid.py
+++ b/yt_dlp/extractor/thisvid.py
@ -1,97 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class ThisVidIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
    _TESTS = [{
        'url': 'https://thisvid.com/videos/french-boy-pantsed/',
        'md5': '3397979512c682f6b85b3b04989df224',
        'info_dict': {
            'id': '2400174',
            'ext': 'mp4',
            'title': 'French Boy Pantsed',
            'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
            'age_limit': 18,
        }
    }, {
        'url': 'https://thisvid.com/embed/2400174/',
        'md5': '3397979512c682f6b85b3b04989df224',
        'info_dict': {
            'id': '2400174',
            'ext': 'mp4',
            'title': 'French Boy Pantsed',
            'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
            'age_limit': 18,
        }
    }]
    def _real_extract(self, url):
        main_id = self._match_id(url)
        webpage = self._download_webpage(url, main_id)
        # URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
        kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
        if not kvs_version.startswith("5."):
            self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
        title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
        # video_id, video_url and license_code from the 'flashvars' JSON object:
        video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
        video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
        license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
        thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
        if thumbnail.startswith("//"):
            thumbnail = "https:" + thumbnail
        if (re.match(self._VALID_URL, url).group('type') == "videos"):
            display_id = main_id
        else:
            display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'url': getrealurl(video_url, license_code),
            'thumbnail': thumbnail,
            'age_limit': 18,
        }
 def getrealurl(video_url, license_code):
    urlparts = video_url.split('/')[2:]
    license = getlicensetoken(license_code)
    newmagic = urlparts[5][:32]
    for o in range(len(newmagic) - 1, -1, -1):
        new = ""
        l = (o + sum([int(n) for n in license[o:]])) % 32
        for i in range(0, len(newmagic)):
            if i == o:
                new += newmagic[l]
            elif i == l:
                new += newmagic[o]
            else:
                new += newmagic[i]
        newmagic = new
    urlparts[5] = newmagic + urlparts[5][32:]
    return "/".join(urlparts)
 def getlicensetoken(license):
    modlicense = license.replace("$", "").replace("0", "1")
    center = int(len(modlicense) / 2)
    fronthalf = int(modlicense[:center + 1])
    backhalf = int(modlicense[center:])
    modlicense = str(4 * abs(fronthalf - backhalf))
    retval = ""
    for o in range(0, center + 1):
        for i in range(1, 5):
            retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
    return retval