use multiple regexes to match urls

I can't live without regex101
This commit is contained in:
Mozi 2024-08-31 05:06:20 +00:00
parent c61db5e643
commit 84b90ec378
1 changed files with 21 additions and 11 deletions

View File

@ -99,19 +99,26 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'''(?ix) _VALID_URL_PREFIX = r'''(?ix)
https?:// https?://
(?: (?:
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}| (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
(?:www\.)?lequipe\.fr (?:www\.)?lequipe\.fr
)/ )/
'''
_VALID_URL = [
rf'''{_VALID_URL_PREFIX}
(?: (?:
video/| video/|
swf(?:/(?!video)|/video/)| swf(?:/(?!video)|/video/)
player(?:/\w+)?\.html\?(?:video|playlist)= )(?P<id>[^/?_&#]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
) ''',
(?P<id>[^/?_&#]+) rf'''{_VALID_URL_PREFIX}
''' (?:
player(?:/\w+)?\.html\?
)(?:video[=/](?P<id>[^/?_&#]+))?(?:.*?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
''',
]
IE_NAME = 'dailymotion' IE_NAME = 'dailymotion'
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1'] _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{ _TESTS = [{
@ -225,6 +232,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, { }, {
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video', 'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi # https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
@ -285,13 +295,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url) url, smuggled_data = unsmuggle_url(url)
video_id = self._match_id(url) video_id, playlist_id = self._match_valid_url(url).groups()
if 'playlist=' in url: if playlist_id:
if self._yes_playlist(video_id, video_id): if self._yes_playlist(playlist_id, video_id):
return self.url_result( return self.url_result(
'http://www.dailymotion.com/playlist/' + video_id, 'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', video_id) 'DailymotionPlaylist', playlist_id)
password = self.get_param('videopassword') password = self.get_param('videopassword')
media = self._call_api( media = self._call_api(