From 0a8a7e68fabf6fc9387f270301e51225ac349b00 Mon Sep 17 00:00:00 2001 From: Teemu Ikonen Date: Tue, 5 Apr 2022 15:15:47 +0300 Subject: [PATCH] [ruutu] Detect embeds (#3294) Authored by: tpikonen --- yt_dlp/extractor/generic.py | 26 +++++++++++++++++++++++++- yt_dlp/extractor/ruutu.py | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 65e803dd70..2c503e5817 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -149,6 +149,7 @@ from .blogger import BloggerIE from .mainstreaming import MainStreamingIE from .gfycat import GfycatIE from .panopto import PanoptoBaseIE +from .ruutu import RuutuIE class GenericIE(InfoExtractor): @@ -2511,7 +2512,24 @@ class GenericIE(InfoExtractor): 'id': 'insert-a-quiz-into-a-panopto-video' }, 'playlist_count': 1 - } + }, + { + # Ruutu embed + 'url': 'https://www.nelonen.fi/ohjelmat/madventures-suomi/2160731-riku-ja-tunna-lahtevat-peurajahtiin-tv-sta-tutun-biologin-kanssa---metsastysreissu-huipentuu-kasvissyojan-painajaiseen', + 'md5': 'a2513a98d3496099e6eced40f7e6a14b', + 'info_dict': { + 'id': '4044426', + 'ext': 'mp4', + 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!', + 'thumbnail': r're:^https?://.+\.jpg$', + 'duration': 108, + 'series' : 'Madventures Suomi', + 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381', + 'categories': ['Matkailu', 'Elämäntyyli'], + 'age_limit': 0, + 'upload_date': '20220308', + }, + }, ] def report_following_redirect(self, new_url): @@ -3737,6 +3755,12 @@ class GenericIE(InfoExtractor): panopto_urls = PanoptoBaseIE._extract_urls(webpage) if panopto_urls: return self.playlist_from_matches(panopto_urls, video_id, video_title) + + # Look for Ruutu embeds + ruutu_url = RuutuIE._extract_url(webpage) + if ruutu_url: + return self.url_result(ruutu_url, RuutuIE) + # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index d9cf39d712..5a30e33606 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import json +import re + from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( @@ -8,6 +11,8 @@ from ..utils import ( ExtractorError, find_xpath_attr, int_or_none, + traverse_obj, + try_call, unified_strdate, url_or_none, xpath_attr, @@ -123,6 +128,16 @@ class RuutuIE(InfoExtractor): ] _API_BASE = 'https://gatling.nelonenmedia.fi' + @classmethod + def _extract_url(cls, webpage): + settings = try_call( + lambda: json.loads(re.search( + r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) + video_id = traverse_obj(settings, ( + 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) + if video_id: + return f'http://www.ruutu.fi/video/{video_id}' + def _real_extract(self, url): video_id = self._match_id(url)