diff --git a/test/test_utils.py b/test/test_utils.py index dedc598f7..d20bca795 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -62,6 +62,7 @@ from yt_dlp.utils import ( parse_iso8601, parse_resolution, parse_bitrate, + parse_qs, pkcs1pad, read_batch_urls, sanitize_filename, @@ -117,8 +118,6 @@ from yt_dlp.compat import ( compat_getenv, compat_os_name, compat_setenv, - compat_urlparse, - compat_parse_qs, ) @@ -688,38 +687,36 @@ class TestUtil(unittest.TestCase): self.assertTrue(isinstance(data, bytes)) def test_update_url_query(self): - def query_dict(url): - return compat_parse_qs(compat_urlparse.urlparse(url).query) - self.assertEqual(query_dict(update_url_query( + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), - query_dict('http://example.com/path?quality=HD&format=mp4')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?quality=HD&format=mp4')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), - query_dict('http://example.com/path?system=LINUX&system=WINDOWS')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?system=LINUX&system=WINDOWS')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': 'id,formats,subtitles'})), - query_dict('http://example.com/path?fields=id,formats,subtitles')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), - query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?manifest=f4m', {'manifest': []})), - query_dict('http://example.com/path')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), - query_dict('http://example.com/path?system=LINUX')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?system=LINUX')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': b'id,formats,subtitles'})), - query_dict('http://example.com/path?fields=id,formats,subtitles')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?fields=id,formats,subtitles')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'width': 1080, 'height': 720})), - query_dict('http://example.com/path?width=1080&height=720')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?width=1080&height=720')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'bitrate': 5020.43})), - query_dict('http://example.com/path?bitrate=5020.43')) - self.assertEqual(query_dict(update_url_query( + parse_qs('http://example.com/path?bitrate=5020.43')) + self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'test': '第二行тест'})), - query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) + parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) def test_multipart_encode(self): self.assertEqual( diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 133b5e254..4766a2c77 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -4,13 +4,10 @@ from __future__ import unicode_literals import re from .yahoo import YahooIE -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( ExtractorError, int_or_none, + parse_qs, url_or_none, ) @@ -119,7 +116,7 @@ class AolIE(YahooIE): 'height': int(mobj.group(2)), }) else: - qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + qs = parse_qs(video_url) f.update({ 'width': int_or_none(qs.get('w', [None])[0]), 'height': int_or_none(qs.get('h', [None])[0]), diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index db685ff42..d90fcb13a 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -9,8 +9,6 @@ from .youtube import YoutubeIE from ..compat import ( compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, - compat_urlparse, - compat_parse_qs, compat_HTTPError ) from ..utils import ( @@ -25,6 +23,7 @@ from ..utils import ( merge_dicts, mimetype2ext, parse_duration, + parse_qs, RegexNotFoundError, str_to_int, str_or_none, @@ -399,7 +398,7 @@ class YoutubeWebArchiveIE(InfoExtractor): expected=True) raise video_file_url = compat_urllib_parse_unquote(video_file_webpage.url) - video_file_url_qs = compat_parse_qs(compat_urlparse.urlparse(video_file_url).query) + video_file_url_qs = parse_qs(video_file_url) # Attempt to recover any ext & format info from playback url format = {'url': video_file_url} diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index 7b1886141..4f4f457c1 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -4,12 +4,12 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, float_or_none, int_or_none, parse_iso8601, + parse_qs, try_get, ) @@ -69,7 +69,7 @@ class ArkenaIE(InfoExtractor): # Handle http://video.arkena.com/play2/embed/player URL if not video_id: - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) video_id = qs.get('mediaId', [None])[0] account_id = qs.get('accountId', [None])[0] if not video_id or not account_id: diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index c163db9c9..ed245b75f 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -6,11 +6,11 @@ import re from .common import InfoExtractor from ..compat import ( compat_str, - compat_urlparse, ) from ..utils import ( ExtractorError, int_or_none, + parse_qs, qualities, try_get, unified_strdate, @@ -204,7 +204,7 @@ class ArteTVEmbedIE(InfoExtractor): webpage)] def _real_extract(self, url): - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) json_url = qs['json_url'][0] video_id = ArteTVIE._match_id(json_url) return self.url_result( diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 09b2932d2..de497ab1d 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -10,9 +10,7 @@ from .common import InfoExtractor from ..compat import ( compat_etree_Element, compat_HTTPError, - compat_parse_qs, compat_str, - compat_urllib_parse_urlparse, compat_urlparse, ) from ..utils import ( @@ -26,6 +24,7 @@ from ..utils import ( js_to_json, parse_duration, parse_iso8601, + parse_qs, strip_or_none, try_get, unescapeHTML, @@ -1410,7 +1409,7 @@ class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor): def _real_extract(self, url): pid = self._match_id(url) - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) series_id = qs.get('seriesId', [None])[0] page = qs.get('page', [None])[0] per_page = 36 if page else self._PAGE_SIZE diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 5788d13ba..8fbabe708 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -3,10 +3,10 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_str, - compat_urlparse, ) from ..utils import ( int_or_none, + parse_qs, unified_timestamp, ) @@ -57,7 +57,7 @@ class BeegIE(InfoExtractor): query = { 'v': 2, } - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) t = qs.get('t', [''])[0].split('-') if len(t) > 1: query.update({ diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 31606d3bd..f3d955d6b 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -11,7 +11,6 @@ from ..compat import ( compat_etree_fromstring, compat_HTTPError, compat_parse_qs, - compat_urllib_parse_urlparse, compat_urlparse, compat_xml_parse_error, ) @@ -26,6 +25,7 @@ from ..utils import ( js_to_json, mimetype2ext, parse_iso8601, + parse_qs, smuggle_url, str_or_none, try_get, @@ -177,7 +177,7 @@ class BrightcoveLegacyIE(InfoExtractor): flashvars = {} data_url = object_doc.attrib.get('data', '') - data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) + data_url_params = parse_qs(data_url) def find_param(name): if name in flashvars: diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index da404e4dc..349c5eb50 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -4,14 +4,11 @@ from __future__ import unicode_literals import itertools from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( clean_html, float_or_none, int_or_none, + parse_qs, try_get, urlencode_postdata, ) @@ -145,7 +142,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): query['from'] += query['size'] def _real_extract(self, url): - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query = parse_qs(url) query['type'] = 'session' return self.playlist_result( self._entries(query, url), playlist_title='Search query') diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index 06d04de13..e6b2ac4d4 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -1,12 +1,9 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, + parse_qs, unified_timestamp, ) @@ -44,7 +41,7 @@ class ClypIE(InfoExtractor): def _real_extract(self, url): audio_id = self._match_id(url) - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) token = qs.get('token', [None])[0] query = {} diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py index b0911cf94..8aa2af9a8 100644 --- a/yt_dlp/extractor/daum.py +++ b/yt_dlp/extractor/daum.py @@ -6,10 +6,9 @@ import itertools from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_urllib_parse_unquote, - compat_urlparse, ) +from ..utils import parse_qs class DaumBaseIE(InfoExtractor): @@ -155,7 +154,7 @@ class DaumListIE(InfoExtractor): return name, entries def _check_clip(self, url, list_id): - query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query) + query_dict = parse_qs(url) if 'clipid' in query_dict: clip_id = query_dict['clipid'][0] if self.get_param('noplaylist'): @@ -256,7 +255,7 @@ class DaumUserIE(DaumListIE): if clip_result: return clip_result - query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query) + query_dict = parse_qs(url) if 'playlistid' in query_dict: playlist_id = query_dict['playlistid'][0] return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist') diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 2c1c747a1..60ab2ce13 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, orderedSet, parse_duration, + parse_qs, qualities, unified_strdate, xpath_text @@ -53,7 +53,7 @@ class EuropaIE(InfoExtractor): if items.get(p): return items[p] - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + query = parse_qs(url) preferred_lang = query.get('sitelang', ('en', ))[0] preferred_langs = orderedSet((preferred_lang, 'en', 'int')) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 202141497..41910cefb 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -6,7 +6,6 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_str, - compat_urlparse, ) from ..utils import ( clean_html, @@ -14,6 +13,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + parse_qs, try_get, url_or_none, urljoin, @@ -226,7 +226,7 @@ class FranceTVIE(InfoExtractor): catalog = mobj.group('catalog') if not video_id: - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) video_id = qs.get('idDiffusion', [None])[0] catalog = qs.get('catalogue', [None])[0] if not video_id: diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py index 59b0a90c3..880918cd7 100644 --- a/yt_dlp/extractor/internetvideoarchive.py +++ b/yt_dlp/extractor/internetvideoarchive.py @@ -4,10 +4,7 @@ import json import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) +from ..utils import parse_qs class InternetVideoArchiveIE(InfoExtractor): @@ -32,7 +29,7 @@ class InternetVideoArchiveIE(InfoExtractor): return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query def _real_extract(self, url): - query = compat_parse_qs(compat_urlparse.urlparse(url).query) + query = parse_qs(url) video_id = query['publishedid'][0] data = self._download_json( 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx', diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index cdfbefcd4..4289c51b8 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -6,16 +6,15 @@ import json from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_str, compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, ) from ..utils import ( determine_ext, ExtractorError, int_or_none, mimetype2ext, + parse_qs, OnDemandPagedList, try_get, urljoin, @@ -256,7 +255,7 @@ class LBRYChannelIE(LBRYBaseIE): result = self._resolve_url( 'lbry://' + display_id, display_id, 'channel') claim_id = result['claim_id'] - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) content = qs.get('content', [None])[0] params = { 'fee_amount': qs.get('fee_amount', ['>=0'])[0], diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index 491e716bd..d8f12dca6 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -4,13 +4,10 @@ from __future__ import unicode_literals import re from .theplatform import ThePlatformBaseIE -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( ExtractorError, int_or_none, + parse_qs, update_url_query, ) @@ -96,7 +93,7 @@ class MediasetIE(ThePlatformBaseIE): @staticmethod def _extract_urls(ie, webpage): def _qs(url): - return compat_parse_qs(compat_urllib_parse_urlparse(url).query) + return parse_qs(url) def _program_guid(qs): return qs.get('programGuid', [None])[0] diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 3c678c50d..7390ef8bc 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -5,10 +5,8 @@ import re from .turner import TurnerBaseIE from ..compat import ( - compat_parse_qs, compat_str, compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, ) from ..utils import ( int_or_none, @@ -16,6 +14,7 @@ from ..utils import ( OnDemandPagedList, parse_duration, parse_iso8601, + parse_qs, try_get, update_url_query, urljoin, @@ -165,7 +164,7 @@ class NBAWatchIE(NBAWatchBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0] + collection_id = parse_qs(url).get('collection', [None])[0] if collection_id: if self.get_param('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % display_id) @@ -359,7 +358,7 @@ class NBAEmbedIE(NBABaseIE): }] def _real_extract(self, url): - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) content_id = qs['contentId'][0] team = qs.get('team', [None])[0] if not team: diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py index aec8433de..78c4952f4 100644 --- a/yt_dlp/extractor/noco.py +++ b/yt_dlp/extractor/noco.py @@ -8,7 +8,6 @@ import hashlib from .common import InfoExtractor from ..compat import ( compat_str, - compat_urlparse, ) from ..utils import ( clean_html, @@ -16,6 +15,7 @@ from ..utils import ( int_or_none, float_or_none, parse_iso8601, + parse_qs, sanitized_Request, urlencode_postdata, ) @@ -123,7 +123,7 @@ class NocoIE(InfoExtractor): webpage, 'noco player', group='player', default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf') - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query) + qs = parse_qs(player_url) ts = int_or_none(qs.get('ts', [None])[0]) self._ts_offset = ts - self._ts() if ts else 0 self._referer = player_url diff --git a/yt_dlp/extractor/pandoratv.py b/yt_dlp/extractor/pandoratv.py index 44b462beb..623005338 100644 --- a/yt_dlp/extractor/pandoratv.py +++ b/yt_dlp/extractor/pandoratv.py @@ -5,12 +5,12 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_str, - compat_urlparse, ) from ..utils import ( ExtractorError, float_or_none, parse_duration, + parse_qs, str_to_int, urlencode_postdata, ) @@ -75,7 +75,7 @@ class PandoraTVIE(InfoExtractor): video_id = mobj.group('id') if not user_id or not video_id: - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) video_id = qs.get('prgid', [None])[0] user_id = qs.get('ch_userid', [None])[0] if any(not f for f in (video_id, user_id,)): diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index e86c65396..dc2030017 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -4,11 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( determine_ext, ExtractorError, int_or_none, + parse_qs, xpath_text, qualities, ) @@ -56,7 +56,7 @@ class PladformIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) pl = qs.get('pl', ['1'])[0] video = self._download_xml( diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index d494753e6..801057ee1 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -17,6 +17,7 @@ from ..utils import ( float_or_none, int_or_none, parse_duration, + parse_qs, qualities, srt_subtitles_timecode, try_get, @@ -273,7 +274,7 @@ query viewClip { return srt def _real_extract(self, url): - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) author = qs.get('author', [None])[0] name = qs.get('name', [None])[0] diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 8f54d5675..01529315f 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -7,13 +7,12 @@ import itertools from .common import InfoExtractor from ..compat import ( compat_str, - compat_parse_qs, - compat_urllib_parse_urlparse, ) from ..utils import ( determine_ext, bool_or_none, int_or_none, + parse_qs, try_get, unified_timestamp, url_or_none, @@ -178,7 +177,7 @@ class RutubeEmbedIE(RutubeBaseIE): embed_id = self._match_id(url) # Query may contain private videos token and should be passed to API # requests (see #19163) - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query = parse_qs(url) options = self._download_api_options(embed_id, query) video_id = options['effective_video'] formats = self._extract_formats(options, video_id) @@ -300,14 +299,14 @@ class RutubePlaylistIE(RutubePlaylistBaseIE): def suitable(cls, url): if not super(RutubePlaylistIE, cls).suitable(url): return False - params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + params = parse_qs(url) return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) def _next_page_url(self, page_num, playlist_id, item_kind): return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) def _real_extract(self, url): - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) playlist_kind = qs['pl_type'][0] playlist_id = qs['pl_id'][0] return self._extract_playlist(playlist_id, item_kind=playlist_kind) diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index 7a1c7e38b..eef4975cb 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_str, compat_urllib_parse_urlparse, ) @@ -13,6 +12,7 @@ from ..utils import ( urljoin, int_or_none, parse_codecs, + parse_qs, try_get, ) @@ -108,7 +108,7 @@ class SeznamZpravyIE(InfoExtractor): return formats def _real_extract(self, url): - params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + params = parse_qs(url) src = params['src'][0] title = params['title'][0] diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index 3a3a99256..fd747f59b 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -4,13 +4,12 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_str, - compat_urllib_parse_urlparse, ) from ..utils import ( determine_ext, int_or_none, + parse_qs, try_get, qualities, ) @@ -78,7 +77,7 @@ class SixPlayIE(InfoExtractor): continue if container == 'm3u8' or ext == 'm3u8': if protocol == 'usp': - if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]: + if parse_qs(asset_url).get('token', [None])[0]: urlh = self._request_webpage( asset_url, video_id, fatal=False, headers=self.geo_verification_headers()) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 2129a5670..94bcaba44 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -2,15 +2,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( clean_html, float_or_none, int_or_none, parse_iso8601, + parse_qs, strip_or_none, try_get, ) @@ -61,7 +58,7 @@ class SportDeutschlandIE(InfoExtractor): } videos = asset.get('videos') or [] if len(videos) > 1: - playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0] + playlist_id = parse_qs(url).get('playlistId', [None])[0] if playlist_id: if self.get_param('noplaylist'): videos = [videos[int(playlist_id)]] diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 3e1a7a9e6..0d9cf75ca 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -6,9 +6,9 @@ import re from .common import InfoExtractor from .jwplatform import JWPlatformIE from .nexx import NexxIE -from ..compat import compat_urlparse from ..utils import ( NO_DEFAULT, + parse_qs, smuggle_url, ) @@ -64,7 +64,7 @@ class Tele5IE(InfoExtractor): }] def _real_extract(self, url): - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0] NEXX_ID_RE = r'\d{6,}' diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index c56b708b8..c2729f12d 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -10,15 +10,12 @@ import hashlib from .once import OnceIE from .adobepass import AdobePassIE -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( determine_ext, ExtractorError, float_or_none, int_or_none, + parse_qs, sanitized_Request, unsmuggle_url, update_url_query, @@ -250,7 +247,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): path += mobj.group('media') path += video_id - qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs_dict = parse_qs(url) if 'guid' in qs_dict: webpage = self._download_webpage(url, video_id) scripts = re.findall(r']+src="([^"]+)"', webpage) @@ -359,7 +356,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if first_video_id is None: first_video_id = cur_video_id duration = float_or_none(item.get('plfile$duration')) - file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes'] + file_asset_types = item.get('plfile$assetTypes') or parse_qs(smil_url)['assetTypes'] for asset_type in file_asset_types: if asset_type in asset_types: continue diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index e544e4737..be70beed4 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -11,7 +11,6 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urlparse, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) @@ -23,6 +22,7 @@ from ..utils import ( int_or_none, parse_duration, parse_iso8601, + parse_qs, qualities, try_get, unified_timestamp, @@ -571,7 +571,7 @@ class TwitchVideosIE(TwitchPlaylistBaseIE): def _real_extract(self, url): channel_name = self._match_id(url) - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) filter = qs.get('filter', ['all'])[0] sort = qs.get('sort', ['time'])[0] broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST) @@ -647,7 +647,7 @@ class TwitchVideosClipsIE(TwitchPlaylistBaseIE): def _real_extract(self, url): channel_name = self._match_id(url) - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) range = qs.get('range', ['7d'])[0] clip = self._RANGE.get(range, self._DEFAULT_CLIP) return self.playlist_result( diff --git a/yt_dlp/extractor/varzesh3.py b/yt_dlp/extractor/varzesh3.py index f474ed73f..81313dc9d 100644 --- a/yt_dlp/extractor/varzesh3.py +++ b/yt_dlp/extractor/varzesh3.py @@ -2,12 +2,9 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_parse_qs, -) from ..utils import ( clean_html, + parse_qs, remove_start, ) @@ -59,7 +56,7 @@ class Varzesh3IE(InfoExtractor): fb_sharer_url = self._search_regex( r']+href="(https?://www\.facebook\.com/sharer/sharer\.php?[^"]+)"', webpage, 'facebook sharer URL', fatal=False) - sharer_params = compat_parse_qs(compat_urllib_parse_urlparse(fb_sharer_url).query) + sharer_params = parse_qs(fb_sharer_url) thumbnail = sharer_params.get('p[images][0]', [None])[0] video_id = self._search_regex( diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index 142ac8dc2..8a0f29259 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -6,13 +6,13 @@ import json from .common import InfoExtractor from ..compat import ( compat_str, - compat_urlparse, compat_HTTPError, ) from ..utils import ( ExtractorError, int_or_none, parse_iso8601, + parse_qs, ) @@ -218,7 +218,7 @@ class VevoPlaylistIE(VevoBaseIE): webpage = self._download_webpage(url, playlist_id) - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + qs = parse_qs(url) index = qs.get('index', [None])[0] if index: diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index f7c24d259..da0212bb2 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, mimetype2ext, parse_codecs, + parse_qs, update_url_query, urljoin, xpath_element, @@ -20,7 +21,6 @@ from ..compat import ( compat_b64decode, compat_ord, compat_struct_pack, - compat_urlparse, ) @@ -113,7 +113,7 @@ class VideaIE(InfoExtractor): for i in range(0, 32): result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query) + query = parse_qs(player_url) random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) query['_s'] = random_seed query['_t'] = result[:16] diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py index baafdc15d..17ef3b1b9 100644 --- a/yt_dlp/extractor/videomore.py +++ b/yt_dlp/extractor/videomore.py @@ -5,12 +5,11 @@ import re from .common import InfoExtractor from ..compat import ( - compat_parse_qs, compat_str, - compat_urllib_parse_urlparse, ) from ..utils import ( int_or_none, + parse_qs, ) @@ -146,7 +145,7 @@ class VideomoreIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('sid') or mobj.group('id') - partner_id = mobj.group('partner_id') or compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('partner_id', [None])[0] or '97' + partner_id = mobj.group('partner_id') or parse_qs(url).get('partner_id', [None])[0] or '97' item = self._download_json( 'https://siren.more.tv/player/config', video_id, query={ diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 5c09c8520..8b367a4e6 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -25,6 +25,7 @@ from ..utils import ( OnDemandPagedList, parse_filesize, parse_iso8601, + parse_qs, RegexNotFoundError, sanitized_Request, smuggle_url, @@ -265,7 +266,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): download_url = download_data.get('link') if not download_url or download_data.get('quality') != 'source': continue - query = compat_urlparse.parse_qs(compat_urlparse.urlparse(download_url).query) + query = parse_qs(download_url) return { 'url': download_url, 'ext': determine_ext(query.get('filename', [''])[0].lower()), diff --git a/yt_dlp/extractor/xboxclips.py b/yt_dlp/extractor/xboxclips.py index 25f487e1e..9bac982f8 100644 --- a/yt_dlp/extractor/xboxclips.py +++ b/yt_dlp/extractor/xboxclips.py @@ -4,14 +4,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( int_or_none, month_by_abbreviation, parse_filesize, + parse_qs, ) @@ -37,7 +34,7 @@ class XboxClipsIE(InfoExtractor): video_id = self._match_id(url) if '/video.php' in url: - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5bce53349..15e0f8adb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -46,6 +46,7 @@ from ..utils import ( parse_count, parse_duration, parse_iso8601, + parse_qs, qualities, remove_start, smuggle_url, @@ -64,10 +65,6 @@ from ..utils import ( ) -def parse_qs(url): - return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -1842,7 +1839,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def suitable(cls, url): # Hack for lazy extractors until more generic solution is implemented # (see #28780) - from .youtube import parse_qs + from ..utils import parse_qs + qs = parse_qs(url) if qs.get('list', [None])[0]: return False @@ -4598,7 +4596,7 @@ class YoutubeSearchURLIE(YoutubeSearchIE): return cls._VALID_URL def _real_extract(self, url): - qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + qs = parse_qs(url) query = (qs.get('search_query') or qs.get('q'))[0] self._SEARCH_PARAMS = qs.get('sp', ('',))[0] return self._get_n_results(query, self._MAX_RESULTS) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6276ac726..c07a17099 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4167,6 +4167,10 @@ def escape_url(url): ).geturl() +def parse_qs(url): + return compat_parse_qs(compat_urllib_parse_urlparse(url).query) + + def read_batch_urls(batch_fd): def fixup(url): if not isinstance(url, compat_str):