From 8b0dc0946c66ff89d9035ee2e750004037e06eb6 Mon Sep 17 00:00:00 2001 From: DrakoCpp <160542400+DrakoCpp@users.noreply.github.com> Date: Mon, 19 Feb 2024 20:47:49 +0000 Subject: [PATCH 1/2] [murrtube] Fix extractor (#7500) --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/murrtube.py | 204 ++++++++++---------------------- 2 files changed, 61 insertions(+), 145 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fc22e1571..4cfa5b442 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1143,7 +1143,7 @@ from .mtv import ( MTVItaliaProgrammaIE, ) from .muenchentv import MuenchenTVIE -from .murrtube import MurrtubeIE, MurrtubeUserIE +from .murrtube import MurrtubeIE from .museai import MuseAIIE from .musescore import MuseScoreIE from .musicdex import ( diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index 6cdbbda16..cb63c8f97 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -1,13 +1,8 @@ -import functools -import json - +import re from .common import InfoExtractor from ..utils import ( - ExtractorError, - OnDemandPagedList, - determine_ext, - int_or_none, - try_get, + urlencode_postdata, + extract_attributes ) @@ -15,148 +10,69 @@ class MurrtubeIE(InfoExtractor): _VALID_URL = r'''(?x) (?: murrtube:| - https?://murrtube\.net/videos/(?P[a-z0-9\-]+)\- + https?://murrtube\.net/v/| + https?://murrtube\.net/videos/(?P[a-z0-9\-]+?)\- ) - (?P[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12}) + (?P[A-Z0-9]{4}|[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12}) ''' - _TEST = { - 'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0', - 'md5': '169f494812d9a90914b42978e73aa690', - 'info_dict': { - 'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0', - 'ext': 'mp4', - 'title': 'Inferno X Skyler', - 'description': 'Humping a very good slutty sheppy (roomate)', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 284, - 'uploader': 'Inferno Wolf', - 'age_limit': 18, - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'], - } - } - def _download_gql(self, video_id, op, note=None, fatal=True): - result = self._download_json( - 'https://murrtube.net/graphql', - video_id, note, data=json.dumps(op).encode(), fatal=fatal, - headers={'Content-Type': 'application/json'}) - return result['data'] + _TESTS = [ + { + "url": "https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0", + "md5": "169f494812d9a90914b42978e73aa690", + "info_dict": { + "id": "ca885d8456b95de529b6723b158032e11115d", + "ext": "mp4", + "title": "Inferno X Skyler", + "description": "Humping a very good slutty sheppy (roomate)", + "uploader": "Inferno Wolf", + "age_limit": 18, + }, + }, + { + "url": "https://murrtube.net/v/0J2Q", + "md5": "757e53c0795a03d53bb4ca243f851aba", + "info_dict": { + "id": "8442998c52134968d9caa36e473e1a6bac6ca", + "uploader": "Hayel", + "title": "Who's in charge now?", + "description": """Fenny sneaked into my bed room and played naughty with one of my plushies. I caught him in the act and wanted to punish him. He thought he was in charge and wanted to use me instead but he wasn't prepared on my butt milking him within just a minute. + +Fenny: @fenny_ad (both here and on Twitter) +Hayel on Twitter: https://twitter.com/plushmods""", + "age_limit": 18, + } + } + ] def _real_extract(self, url): - video_id = self._match_id(url) - data = self._download_gql(video_id, { - 'operationName': 'Medium', - 'variables': { - 'id': video_id, - }, - 'query': '''\ -query Medium($id: ID!) { - medium(id: $id) { - title - description - key - duration - commentsCount - likesCount - viewsCount - thumbnailKey - tagList - user { - name - __typename - } - __typename - } -}'''}) - meta = data['medium'] - - storage_url = 'https://storage.murrtube.net/murrtube/' - format_url = storage_url + meta.get('key', '') - thumbnail = storage_url + meta.get('thumbnailKey', '') - - if determine_ext(format_url) == 'm3u8': - formats = self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False) - else: - formats = [{'url': format_url}] - + video_id = self._match_valid_url(url) + # TODO: This part could be smarter (Set and store age cookie?) + video_page = self._download_webpage( + 'https://murrtube.net', None, note='Getting session token') + data = self._hidden_inputs(video_page) + self._download_webpage( + 'https://murrtube.net/accept_age_check', None, 'Set age cookie', data=urlencode_postdata(data)) + video_page = self._download_webpage(url, None) + video_attrs = extract_attributes(self._search_regex(r'(]+>)', video_page, 'video')) + playlist = video_attrs['data-url'].split('?')[0] + matches = re.compile(r'https://storage.murrtube.net/murrtube-production/.+/(?P.+)/index.m3u8').match(playlist).groupdict() + video_id = matches['id'] + formats = self._extract_m3u8_formats(playlist, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False) + title = self._html_search_meta( + 'og:title', video_page, display_name='title', fatal=True)[:-11] + description = self._html_search_meta( + 'og:description', video_page, display_name='description', fatal=True) + thumbnail = self._html_search_meta( + 'og:image', video_page, display_name='thumbnail', fatal=True) + uploader = self._html_search_regex( + r'(.+?)', video_page, 'uploader', default=None) return { 'id': video_id, - 'title': meta.get('title'), - 'description': meta.get('description'), - 'formats': formats, - 'thumbnail': thumbnail, - 'duration': int_or_none(meta.get('duration')), - 'uploader': try_get(meta, lambda x: x['user']['name']), - 'view_count': meta.get('viewsCount'), - 'like_count': meta.get('likesCount'), - 'comment_count': meta.get('commentsCount'), - 'tags': meta.get('tagList'), + 'title': title, 'age_limit': 18, + 'formats': formats, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, } - - -class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE - IE_DESC = 'Murrtube user profile' - _VALID_URL = r'https?://murrtube\.net/(?P[^/]+)$' - _TEST = { - 'url': 'https://murrtube.net/stormy', - 'info_dict': { - 'id': 'stormy', - }, - 'playlist_mincount': 27, - } - _PAGE_SIZE = 10 - - def _fetch_page(self, username, user_id, page): - data = self._download_gql(username, { - 'operationName': 'Media', - 'variables': { - 'limit': self._PAGE_SIZE, - 'offset': page * self._PAGE_SIZE, - 'sort': 'latest', - 'userId': user_id, - }, - 'query': '''\ -query Media($q: String, $sort: String, $userId: ID, $offset: Int!, $limit: Int!) { - media(q: $q, sort: $sort, userId: $userId, offset: $offset, limit: $limit) { - id - __typename - } -}'''}, - 'Downloading page {0}'.format(page + 1)) - if data is None: - raise ExtractorError(f'Failed to retrieve video list for page {page + 1}') - - media = data['media'] - - for entry in media: - yield self.url_result('murrtube:{0}'.format(entry['id']), MurrtubeIE.ie_key()) - - def _real_extract(self, url): - username = self._match_id(url) - data = self._download_gql(username, { - 'operationName': 'User', - 'variables': { - 'id': username, - }, - 'query': '''\ -query User($id: ID!) { - user(id: $id) { - id - __typename - } -}'''}, - 'Downloading user info') - if data is None: - raise ExtractorError('Failed to fetch user info') - - user = data['user'] - - entries = OnDemandPagedList(functools.partial( - self._fetch_page, username, user.get('id')), self._PAGE_SIZE) - - return self.playlist_result(entries, username) From b8e336b6e3d4a125d363116e2bc6cb02cad97856 Mon Sep 17 00:00:00 2001 From: DrakoCpp <160542400+DrakoCpp@users.noreply.github.com> Date: Mon, 19 Feb 2024 21:17:26 +0000 Subject: [PATCH 2/2] [murrtube] Fix tests --- yt_dlp/extractor/murrtube.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index cb63c8f97..d0178daac 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -19,7 +19,7 @@ class MurrtubeIE(InfoExtractor): _TESTS = [ { "url": "https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0", - "md5": "169f494812d9a90914b42978e73aa690", + "md5": "70380878a77e8565d4aea7f68b8bbb35", "info_dict": { "id": "ca885d8456b95de529b6723b158032e11115d", "ext": "mp4", @@ -27,20 +27,20 @@ class MurrtubeIE(InfoExtractor): "description": "Humping a very good slutty sheppy (roomate)", "uploader": "Inferno Wolf", "age_limit": 18, + "thumbnail": "https://storage.murrtube.net/murrtube-production/ekbs3zcfvuynnqfx72nn2tkokvsd" }, }, { "url": "https://murrtube.net/v/0J2Q", - "md5": "757e53c0795a03d53bb4ca243f851aba", + "md5": "31262f6ac56f0ca75e5a54a0f3fefcb6", "info_dict": { "id": "8442998c52134968d9caa36e473e1a6bac6ca", + "ext": "mp4", "uploader": "Hayel", - "title": "Who's in charge now?", - "description": """Fenny sneaked into my bed room and played naughty with one of my plushies. I caught him in the act and wanted to punish him. He thought he was in charge and wanted to use me instead but he wasn't prepared on my butt milking him within just a minute. - -Fenny: @fenny_ad (both here and on Twitter) -Hayel on Twitter: https://twitter.com/plushmods""", + "title": "Who's in charge now?", + "description": """Fenny sneaked into my bed room and played naughty with one of my plushies. I caught him in the act and wanted to punish him. He thought he was in charge and wanted to use me instead but he wasn't prepared on my butt milking him within just a minute. Fenny: @fenny_ad (both here and on Twitter) Hayel on Twitter: https://twitter.com/plushmods""", "age_limit": 18, + "thumbnail": "https://storage.murrtube.net/murrtube-production/fb1ojjwiucufp34ya6hxu5vfqi5s" } } ] @@ -64,7 +64,7 @@ Hayel on Twitter: https://twitter.com/plushmods""", description = self._html_search_meta( 'og:description', video_page, display_name='description', fatal=True) thumbnail = self._html_search_meta( - 'og:image', video_page, display_name='thumbnail', fatal=True) + 'og:image', video_page, display_name='thumbnail', fatal=True).split("?")[0] uploader = self._html_search_regex( r'(.+?)', video_page, 'uploader', default=None) return {