From b3febedbeb662dfdf9b5c1d5799039ad4fc969de Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Thu, 21 Sep 2023 11:30:32 -0600 Subject: [PATCH] [ie/Canal1,CaracolTvPlay] Add extractors (#7151) Closes #5826 Authored by: elyse0 --- yt_dlp/extractor/_extractors.py | 2 + yt_dlp/extractor/canal1.py | 39 +++++++++ yt_dlp/extractor/caracoltv.py | 136 ++++++++++++++++++++++++++++++++ yt_dlp/extractor/mediastream.py | 8 +- 4 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 yt_dlp/extractor/canal1.py create mode 100644 yt_dlp/extractor/caracoltv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3ce6baef2..632d6720e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -296,9 +296,11 @@ from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .camwithher import CamWithHerIE +from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .caracoltv import CaracolTvPlayIE from .carambatv import ( CarambaTVIE, CarambaTVPageIE, diff --git a/yt_dlp/extractor/canal1.py b/yt_dlp/extractor/canal1.py new file mode 100644 index 000000000..587a11ab8 --- /dev/null +++ b/yt_dlp/extractor/canal1.py @@ -0,0 +1,39 @@ +from .common import InfoExtractor + + +class Canal1IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P[\w-]+)' + + _TESTS = [{ + 'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/', + 'info_dict': { + 'id': '63b39f6b354977084b85ab54', + 'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco', + 'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó', + 'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013', + 'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54', + 'ext': 'mp4', + }, + }, { + 'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/', + 'info_dict': { + 'id': '63b39e93f5fd223aa32250fb', + 'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter', + 'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter', + 'description': 'md5:d9f691f131a21ce6767ca6c05d17d791', + 'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb', + 'ext': 'mp4', + }, + }, { + # Geo-restricted to Colombia + 'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + return self.url_result( + self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'), + display_id=display_id, url_transparent=True) diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py new file mode 100644 index 000000000..79f7752fe --- /dev/null +++ b/yt_dlp/extractor/caracoltv.py @@ -0,0 +1,136 @@ +import base64 +import json +import uuid + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + traverse_obj, + urljoin, +) + + +class CaracolTvPlayIE(InfoExtractor): + _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P[^/?#]+)' + _NETRC_MACHINE = 'caracoltv-play' + + _TESTS = [{ + 'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==', + 'info_dict': { + 'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==', + 'title': 'La teoría del promedio', + 'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3', + }, + 'playlist_count': 6, + }, { + 'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0', + 'info_dict': { + 'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==', + 'title': 'Ella', + 'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8', + }, + 'playlist_count': 10, + }, { + 'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0', + 'info_dict': { + 'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==', + 'title': 'La vuelta al mundo en 80 risas 2022', + 'description': 'md5:e97aac36106e5c37ebf947b3350106a4', + }, + 'playlist_count': 17, + }, { + 'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1', + 'only_matching': True, + }] + + _USER_TOKEN = None + + def _extract_app_token(self, webpage): + config_js_path = self._search_regex( + r']+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False) + + mediation_config = {} if not config_js_path else self._search_json( + r'mediation\s*:', self._download_webpage( + urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'), + 'mediation_config', None, transform_source=js_to_json, fatal=False) + + key = traverse_obj( + mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50' + secret = traverse_obj( + mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0' + + return base64.b64encode(f'{key}:{secret}'.encode()).decode() + + def _perform_login(self, email, password): + webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False) + app_token = self._extract_app_token(webpage) + + bearer_token = self._download_json( + 'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token', + headers={'Authorization': f'Basic {app_token}'})['token'] + + self._USER_TOKEN = self._download_json( + 'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={ + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {bearer_token}', + }, data=json.dumps({ + 'device_data': { + 'device_id': str(uuid.uuid4()), + 'device_token': '', + 'device_type': 'web' + }, + 'login_data': { + 'enabled': True, + 'email': email, + 'password': password, + } + }).encode())['user_token'] + + def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None): + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4') + + return { + 'id': video_data['id'], + 'title': video_data.get('name'), + 'description': video_data.get('description'), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': traverse_obj( + video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})), + 'series_id': series_id, + 'season_id': season_id, + 'season_number': int_or_none(season_number), + 'episode_number': int_or_none(video_data.get('item_order')), + 'is_live': video_data.get('entry_type') == 3, + } + + def _extract_series_seasons(self, seasons, series_id): + for season in seasons: + api_response = self._download_json( + 'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']}, + headers={'Authorization': f'Bearer {self._USER_TOKEN}'}) + + season_number = season.get('order') + for episode in api_response['items']: + yield self._extract_video(episode, series_id, season['id'], season_number) + + def _real_extract(self, url): + series_id = self._match_id(url) + + if self._USER_TOKEN is None: + self._perform_login('guest@inmobly.com', 'Test@gus1') + + api_response = self._download_json( + 'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id}, + headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0] + + if not api_response.get('seasons'): + return self._extract_video(api_response) + + return self.playlist_result( + self._extract_series_seasons(api_response['seasons'], series_id), + series_id, **traverse_obj(api_response, { + 'title': 'name', + 'description': 'description', + })) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index d5c9aab8a..b8cb5a691 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -106,8 +106,12 @@ class MediaStreamIE(MediaStreamBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - if 'Debido a tu ubicación no puedes ver el contenido' in webpage: - self.raise_geo_restricted() + for message in [ + 'Debido a tu ubicación no puedes ver el contenido', + 'You are not allowed to watch this video: Geo Fencing Restriction' + ]: + if message in webpage: + self.raise_geo_restricted() player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)