use multiple regexes to match urls

I can't live without regex101
This commit is contained in:
Mozi 2024-08-31 05:06:20 +00:00
parent c61db5e643
commit 84b90ec378
1 changed files with 21 additions and 11 deletions

View File

@ -99,19 +99,26 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'''(?ix)
_VALID_URL_PREFIX = r'''(?ix)
https?://
(?:
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
(?:www\.)?lequipe\.fr
)/
'''
_VALID_URL = [
rf'''{_VALID_URL_PREFIX}
(?:
video/|
swf(?:/(?!video)|/video/)|
player(?:/\w+)?\.html\?(?:video|playlist)=
)
(?P<id>[^/?_&#]+)
'''
swf(?:/(?!video)|/video/)
)(?P<id>[^/?_&#]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
''',
rf'''{_VALID_URL_PREFIX}
(?:
player(?:/\w+)?\.html\?
)(?:video[=/](?P<id>[^/?_&#]+))?(?:.*?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
''',
]
IE_NAME = 'dailymotion'
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{
@ -225,6 +232,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, {
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
'only_matching': True,
}, {
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
@ -285,13 +295,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
video_id = self._match_id(url)
video_id, playlist_id = self._match_valid_url(url).groups()
if 'playlist=' in url:
if self._yes_playlist(video_id, video_id):
if playlist_id:
if self._yes_playlist(playlist_id, video_id):
return self.url_result(
'http://www.dailymotion.com/playlist/' + video_id,
'DailymotionPlaylist', video_id)
'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', playlist_id)
password = self.get_param('videopassword')
media = self._call_api(