Merge pull request #2041 from dstftw/imdb-list

[imdb] Add support for IMDb list (#2033)
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-01-01 12:42:59 +01:00
commit 4fb757d1e0
3 changed files with 44 additions and 2 deletions

View File

@ -28,7 +28,8 @@ from youtube_dl.extractor import (
BandcampAlbumIE,
SmotriCommunityIE,
SmotriUserIE,
IviCompilationIE
IviCompilationIE,
ImdbListIE,
)
@ -188,6 +189,15 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'sMjedvGDd8U')
self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48)
if __name__ == '__main__':
unittest.main()

View File

@ -80,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .hypem import HypemIE
from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE
from .imdb import (
ImdbIE,
ImdbListIE
)
from .ina import InaIE
from .infoq import InfoQIE
from .instagram import InstagramIE

View File

@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
'description': descr,
'thumbnail': format_info['slate'],
}
class ImdbListIE(InfoExtractor):
IE_NAME = u'imdb:list'
IE_DESC = u'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
list_id = mobj.group('id')
# RSS XML is sometimes malformed
rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
# Export is independent of actual author_id, but returns 404 if no author_id is provided.
# However, passing dummy author_id seems to be enough.
csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
list_id, u'Downloading list CSV')
entries = []
for item in csv.split('\n')[1:]:
cols = item.split(',')
if len(cols) < 2:
continue
item_id = cols[1][1:-1]
if item_id.startswith('vi'):
entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
return self.playlist_result(entries, list_id, list_title)