2014-08-18 15:39:35 +02:00
from . common import InfoExtractor
2015-02-16 15:45:01 +01:00
from . . utils import (
int_or_none ,
parse_duration ,
)
2014-08-18 15:39:35 +02:00
2015-02-16 15:45:01 +01:00
class RtlNlIE ( InfoExtractor ) :
IE_NAME = ' rtl.nl '
IE_DESC = ' rtl.nl and rtlxl.nl '
2022-08-01 03:23:25 +02:00
_EMBED_REGEX = [ r ' <iframe[^>]+? \ bsrc=(?P<q1>[ \' " ])(?P<url>(?:https?:)?//(?:(?:www|static) \ .)?rtl \ .nl/(?:system/videoplayer/[^ " ]+(?:video_)?)?embed[^ " ]+)(?P=q1) ' ]
2015-02-16 15:45:01 +01:00
_VALID_URL = r ''' (?x)
2017-10-12 18:09:06 +02:00
https ? : / / ( ? : ( ? : www | static ) \. ) ?
2015-02-16 15:45:01 +01:00
( ? :
2020-09-13 16:38:16 +02:00
rtlxl \. nl / ( ? : [ ^ \#]*\#!|programma)/[^/]+/|
2020-09-13 16:42:06 +02:00
rtl \. nl / ( ? : ( ? : system / videoplayer / ( ? : [ ^ / ] + / ) + ( ? : video_ ) ? embed \. html | embed ) \b . + ? \buuid = | video / ) |
embed \. rtl \. nl / \#uuid=
2015-02-16 15:45:01 +01:00
)
( ? P < id > [ 0 - 9 a - f - ] + ) '''
2014-08-18 15:39:35 +02:00
2015-02-16 15:45:01 +01:00
_TESTS = [ {
2020-09-13 16:38:16 +02:00
# new URL schema
' url ' : ' https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f ' ,
' md5 ' : ' 490428f1187b60d714f34e1f2e3af0b6 ' ,
' info_dict ' : {
' id ' : ' 0bd1384d-d970-3086-98bb-5c104e10c26f ' ,
' ext ' : ' mp4 ' ,
' title ' : ' RTL Nieuws ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' timestamp ' : 1593293400 ,
' upload_date ' : ' 20200627 ' ,
' duration ' : 661.08 ,
} ,
} , {
# old URL schema
2016-04-29 22:07:29 +02:00
' url ' : ' http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416 ' ,
' md5 ' : ' 473d1946c1fdd050b2c0161a4b13c373 ' ,
2014-08-18 15:39:35 +02:00
' info_dict ' : {
2016-04-29 22:07:29 +02:00
' id ' : ' 82b1aad1-4a14-3d7b-b554-b0aed1b2c416 ' ,
2014-08-28 17:54:06 +02:00
' ext ' : ' mp4 ' ,
2016-04-29 22:07:29 +02:00
' title ' : ' RTL Nieuws ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' timestamp ' : 1461951000 ,
' upload_date ' : ' 20160429 ' ,
' duration ' : 1167.96 ,
2014-08-18 15:39:35 +02:00
} ,
2020-09-13 16:38:16 +02:00
' skip ' : ' 404 ' ,
2015-02-16 15:45:01 +01:00
} , {
2019-07-26 17:30:18 +02:00
# best format available a3t
2015-02-16 15:45:01 +01:00
' url ' : ' http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false ' ,
' md5 ' : ' dea7474214af1271d91ef332fb8be7ea ' ,
' info_dict ' : {
' id ' : ' 84ae5571-ac25-4225-ae0c-ef8d9efb2aed ' ,
' ext ' : ' mp4 ' ,
' timestamp ' : 1424039400 ,
' title ' : ' RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag ' ,
2017-01-02 13:08:07 +01:00
' thumbnail ' : r ' re:^https?://screenshots \ .rtl \ .nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$ ' ,
2015-02-16 15:45:01 +01:00
' upload_date ' : ' 20150215 ' ,
' description ' : ' Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen. ' ,
}
2015-07-18 20:27:23 +02:00
} , {
2019-03-09 13:14:41 +01:00
# empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
2016-04-29 21:50:26 +02:00
# best format available nettv
2015-07-18 20:27:23 +02:00
' url ' : ' http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false ' ,
' info_dict ' : {
' id ' : ' f536aac0-1dc3-4314-920e-3bd1c5b3811a ' ,
' ext ' : ' mp4 ' ,
' title ' : ' RTL Nieuws - Meer beelden van overval juwelier ' ,
2017-01-02 13:08:07 +01:00
' thumbnail ' : r ' re:^https?://screenshots \ .rtl \ .nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$ ' ,
2015-07-18 20:27:23 +02:00
' timestamp ' : 1437233400 ,
' upload_date ' : ' 20150718 ' ,
' duration ' : 30.474 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
2015-07-05 15:07:07 +02:00
} , {
2015-07-05 22:58:02 +02:00
# encrypted m3u8 streams, georestricted
2015-07-05 15:07:07 +02:00
' url ' : ' http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7 ' ,
' only_matching ' : True ,
2015-06-11 15:03:22 +02:00
} , {
' url ' : ' http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0 ' ,
' only_matching ' : True ,
2016-08-10 16:07:43 +02:00
} , {
' url ' : ' http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f ' ,
' only_matching ' : True ,
2017-06-06 16:21:44 +02:00
} , {
' url ' : ' https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/ ' ,
' only_matching ' : True ,
2017-10-12 18:09:06 +02:00
} , {
' url ' : ' https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl ' ,
' only_matching ' : True ,
2020-09-13 16:42:06 +02:00
} , {
# new embed URL schema
' url ' : ' https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false ' ,
' only_matching ' : True ,
2015-02-16 15:45:01 +01:00
} ]
2014-08-18 15:39:35 +02:00
def _real_extract ( self , url ) :
2015-02-16 15:45:01 +01:00
uuid = self . _match_id ( url )
2014-08-18 15:39:35 +02:00
info = self . _download_json (
2015-06-29 00:59:18 +02:00
' http://www.rtl.nl/system/s4m/vfd/version=2/uuid= %s /fmt=adaptive/ ' % uuid ,
2014-08-18 15:39:35 +02:00
uuid )
2014-08-25 12:59:53 +02:00
2014-08-18 15:39:35 +02:00
material = info [ ' material ' ] [ 0 ]
2015-07-18 20:27:23 +02:00
title = info [ ' abstracts ' ] [ 0 ] [ ' name ' ]
subtitle = material . get ( ' title ' )
if subtitle :
title + = ' - %s ' % subtitle
description = material . get ( ' synopsis ' )
2014-08-18 15:39:35 +02:00
2015-07-05 15:01:07 +02:00
meta = info . get ( ' meta ' , { } )
2015-07-22 22:38:04 +02:00
videopath = material [ ' videopath ' ]
2015-07-05 15:01:07 +02:00
m3u8_url = meta . get ( ' videohost ' , ' http://manifest.us.rtl.nl ' ) + videopath
2014-08-28 17:54:06 +02:00
2016-04-29 21:43:39 +02:00
formats = self . _extract_m3u8_formats (
m3u8_url , uuid , ' mp4 ' , m3u8_id = ' hls ' , fatal = False )
2014-11-18 23:26:44 +01:00
self . _sort_formats ( formats )
2015-02-16 15:45:01 +01:00
thumbnails = [ ]
2015-07-05 15:01:07 +02:00
2015-02-16 15:45:01 +01:00
for p in ( ' poster_base_url ' , ' " thumb_base_url " ' ) :
if not meta . get ( p ) :
continue
thumbnails . append ( {
' url ' : self . _proto_relative_url ( meta [ p ] + uuid ) ,
' width ' : int_or_none ( self . _search_regex (
r ' /sz=([0-9]+) ' , meta [ p ] , ' thumbnail width ' , fatal = False ) ) ,
' height ' : int_or_none ( self . _search_regex (
r ' /sz=[0-9]+x([0-9]+) ' ,
meta [ p ] , ' thumbnail height ' , fatal = False ) )
} )
2014-08-18 15:39:35 +02:00
return {
' id ' : uuid ,
2015-07-18 20:27:23 +02:00
' title ' : title ,
2014-08-28 17:54:06 +02:00
' formats ' : formats ,
2014-08-18 15:39:35 +02:00
' timestamp ' : material [ ' original_date ' ] ,
2015-02-16 15:45:01 +01:00
' description ' : description ,
2014-08-25 12:59:53 +02:00
' duration ' : parse_duration ( material . get ( ' duration ' ) ) ,
2015-02-16 15:45:01 +01:00
' thumbnails ' : thumbnails ,
2014-08-18 15:39:35 +02:00
}
2022-07-14 08:24:27 +02:00
class RTLLuBaseIE ( InfoExtractor ) :
_MEDIA_REGEX = {
' video ' : r ' <rtl-player \ s[^>]* \ bhls \ s*= \ s* " ([^ " ]+) ' ,
' audio ' : r ' <rtl-audioplayer \ s[^>]* \ bsrc \ s*= \ s* " ([^ " ]+) ' ,
' thumbnail ' : r ' <rtl-player \ s[^>]* \ bposter \ s*= \ s* " ([^ " ]+) ' ,
}
def get_media_url ( self , webpage , video_id , media_type ) :
return self . _search_regex ( self . _MEDIA_REGEX [ media_type ] , webpage , f ' { media_type } url ' , default = None )
def get_formats_and_subtitles ( self , webpage , video_id ) :
video_url , audio_url = self . get_media_url ( webpage , video_id , ' video ' ) , self . get_media_url ( webpage , video_id , ' audio ' )
formats , subtitles = [ ] , { }
if video_url is not None :
formats , subtitles = self . _extract_m3u8_formats_and_subtitles ( video_url , video_id )
if audio_url is not None :
formats . append ( { ' url ' : audio_url , ' ext ' : ' mp3 ' , ' vcodec ' : ' none ' } )
return formats , subtitles
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
is_live = video_id in ( ' live ' , ' live-2 ' , ' lauschteren ' )
# TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id>
# we can context from <rtl-comments context=<context> in webpage
webpage = self . _download_webpage ( url , video_id )
formats , subtitles = self . get_formats_and_subtitles ( webpage , video_id )
self . _sort_formats ( formats )
return {
' id ' : video_id ,
' title ' : self . _og_search_title ( webpage ) ,
' description ' : self . _og_search_description ( webpage , default = None ) ,
' formats ' : formats ,
' subtitles ' : subtitles ,
' thumbnail ' : self . get_media_url ( webpage , video_id , ' thumbnail ' ) or self . _og_search_thumbnail ( webpage , default = None ) ,
' is_live ' : is_live ,
}
class RTLLuTeleVODIE ( RTLLuBaseIE ) :
IE_NAME = ' rtl.lu:tele-vod '
_VALID_URL = r ' https?://(?:www \ .)?rtl \ .lu/(tele/(?P<slug>[ \ w-]+)/v/|video/)(?P<id> \ d+)( \ .html)? '
_TESTS = [ {
' url ' : ' https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html ' ,
' info_dict ' : {
' id ' : ' 3266757 ' ,
' title ' : ' Informatiounsversammlung Héichwaasser ' ,
' ext ' : ' mp4 ' ,
' thumbnail ' : ' https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg ' ,
' description ' : ' md5:b1db974408cc858c9fd241812e4a2a14 ' ,
}
} , {
' url ' : ' https://www.rtl.lu/video/3295215 ' ,
' info_dict ' : {
' id ' : ' 3295215 ' ,
' title ' : ' Kulturassisen iwwer d \' Bestandsopnam vum Lëtzebuerger Konscht ' ,
' ext ' : ' mp4 ' ,
' thumbnail ' : ' https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg ' ,
' description ' : ' md5:85bcd4e0490aa6ec969d9bf16927437b ' ,
}
} ]
class RTLLuArticleIE ( RTLLuBaseIE ) :
IE_NAME = ' rtl.lu:article '
_VALID_URL = r ' https?://(?:(www|5minutes|today) \ .)rtl \ .lu/(?:[ \ w-]+)/(?:[ \ w-]+)/a/(?P<id> \ d+) \ .html '
_TESTS = [ {
# Audio-only
' url ' : ' https://www.rtl.lu/sport/news/a/1934360.html ' ,
' info_dict ' : {
' id ' : ' 1934360 ' ,
' ext ' : ' mp3 ' ,
' thumbnail ' : ' https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg ' ,
' description ' : ' md5:5eab4a2a911c1fff7efc1682a38f9ef7 ' ,
' title ' : ' md5:40aa85f135578fbd549d3c9370321f99 ' ,
}
} , {
# 5minutes
' url ' : ' https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html ' ,
' info_dict ' : {
' id ' : ' 1853173 ' ,
' ext ' : ' mp4 ' ,
' description ' : ' md5:ac031da0740e997a5cf4633173634fee ' ,
' title ' : ' md5:87e17722ed21af0f24be3243f4ec0c46 ' ,
' thumbnail ' : ' https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg ' ,
}
} , {
# today.lu
' url ' : ' https://today.rtl.lu/entertainment/news/a/1936203.html ' ,
' info_dict ' : {
' id ' : ' 1936203 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Once Upon A Time...zu Lëtzebuerg: The Three Witches \' Tower ' ,
' description ' : ' The witchy theme continues in the latest episode of Once Upon A Time... ' ,
' thumbnail ' : ' https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg ' ,
}
} ]
class RTLLuLiveIE ( RTLLuBaseIE ) :
_VALID_URL = r ' https?://www \ .rtl \ .lu/(?:tele|radio)/(?P<id>live(?:- \ d+)?|lauschteren) '
_TESTS = [ {
# Tele:live
' url ' : ' https://www.rtl.lu/tele/live ' ,
' info_dict ' : {
' id ' : ' live ' ,
' ext ' : ' mp4 ' ,
' live_status ' : ' is_live ' ,
' title ' : r ' re:RTL - Télé LIVE \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} ' ,
' thumbnail ' : ' https://static.rtl.lu/livestream/channel1.jpg ' ,
}
} , {
# Tele:live-2
' url ' : ' https://www.rtl.lu/tele/live-2 ' ,
' info_dict ' : {
' id ' : ' live-2 ' ,
' ext ' : ' mp4 ' ,
' live_status ' : ' is_live ' ,
' title ' : r ' re:RTL - Télé LIVE \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} ' ,
' thumbnail ' : ' https://static.rtl.lu/livestream/channel2.jpg ' ,
}
} , {
# Radio:lauschteren
' url ' : ' https://www.rtl.lu/radio/lauschteren ' ,
' info_dict ' : {
' id ' : ' lauschteren ' ,
' ext ' : ' mp4 ' ,
' live_status ' : ' is_live ' ,
' title ' : r ' re:RTL - Radio LIVE \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} ' ,
' thumbnail ' : ' https://static.rtl.lu/livestream/rtlradiowebtv.jpg ' ,
}
} ]
class RTLLuRadioIE ( RTLLuBaseIE ) :
_VALID_URL = r ' https?://www \ .rtl \ .lu/radio/(?:[ \ w-]+)/s/(?P<id> \ d+)( \ .html)? '
_TESTS = [ {
' url ' : ' https://www.rtl.lu/radio/5-vir-12/s/4033058.html ' ,
' info_dict ' : {
' id ' : ' 4033058 ' ,
' ext ' : ' mp3 ' ,
' description ' : ' md5:f855a4f3e3235393ae47ed1db5d934b9 ' ,
' title ' : ' 5 vir 12 - Stau um Stau ' ,
' thumbnail ' : ' https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg ' ,
}
} ]