From 78338f71ca2d96e4bf507c438fbb2751742989b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 26 Jun 2014 16:34:36 +0200 Subject: [PATCH] [livestream:original] Add support for folder urls (closes #2631) The webpage only contains shortened links for the videos, since the server doesn't support HEAD requests, we use an specific extractor for them. --- test/test_playlists.py | 9 ++++++ youtube_dl/extractor/__init__.py | 6 +++- youtube_dl/extractor/common.py | 3 ++ youtube_dl/extractor/livestream.py | 52 +++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 42051fe2ab..71dac1b020 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -30,6 +30,7 @@ from youtube_dl.extractor import ( SoundcloudPlaylistIE, TeacherTubeClassroomIE, LivestreamIE, + LivestreamOriginalIE, NHLVideocenterIE, BambuserChannelIE, BandcampAlbumIE, @@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) + def test_livestreamoriginal_folder(self): + dl = FakeYDL() + ie = LivestreamOriginalIE(dl) + result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertTrue(len(result['entries']) >= 28) + def test_nhl_videocenter(self): dl = FakeYDL() ie = NHLVideocenterIE(dl) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 02143de9ec..a1cdcf0f78 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -147,7 +147,11 @@ from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE -from .livestream import LivestreamIE, LivestreamOriginalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) from .lynda import ( LyndaIE, LyndaCourseIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 49e75405e8..e4e4feef9e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -459,6 +459,9 @@ class InfoExtractor(object): if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) + def _og_search_url(self, html, **kargs): + return self._og_search_property('url', html, **kargs) + def _html_search_meta(self, name, html, display_name=None, fatal=False): if display_name is None: display_name = name diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5c71f4f091..2c100d4246 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -9,6 +9,7 @@ from ..utils import ( compat_urlparse, xpath_with_ns, compat_str, + orderedSet, ) @@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor): # The original version of Livestream uses a different system class LivestreamOriginalIE(InfoExtractor): IE_NAME = 'livestream:original' - _VALID_URL = r'https?://www\.livestream\.com/(?P[^/]+)/video\?.*?clipId=(?P.*?)(&|$)' + _VALID_URL = r'''(?x)https?://www\.livestream\.com/ + (?P[^/]+)/(?Pvideo|folder) + (?:\?.*?Id=|/)(?P.*?)(&|$) + ''' _TEST = { 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'info_dict': { @@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor): }, } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - user = mobj.group('user') + def _extract_video(self, user, video_id): api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) info = self._download_xml(api_url, video_id) @@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor): 'ext': 'flv', 'thumbnail': thumbnail_url, } + + def _extract_folder(self, url, folder_id): + webpage = self._download_webpage(url, folder_id) + urls = orderedSet(re.findall(r'.+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + id = mobj.group('id') + webpage = self._download_webpage(url, id) + + return { + '_type': 'url', + 'url': self._og_search_url(webpage), + }