[europa] Add new extractor

2015-08-12 16:59:04 +02:00 · 2015-08-12 16:59:04 +02:00 · 3bb3f04108
parent 59a9efe85b
commit 3bb3f04108
2 changed files with 61 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -158,6 +158,7 @@ from .eroprofile import EroProfileIE
 from .escapist import EscapistIE
 from .espn import ESPNIE
 from .esri import EsriVideoIE
 from .europa import EuropaIE
 from .everyonesmixtape import EveryonesMixtapeIE
 from .exfm import ExfmIE
 from .expotv import ExpoTVIE
--- a/youtube_dl/extractor/europa.py
+++ b/youtube_dl/extractor/europa.py
@ -0,0 +1,60 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    compat_urlparse,
    xpath_text
 )
 class EuropaIE(InfoExtractor):
    _VALID_URL = r'https?://ec\.europa\.eu/avservices/video/player\.cfm\?(?:[^&]|&(?!ref))*ref=(?P<id>[A-Za-z0-9]+)'
    _TEST = {
        'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
        'md5': '728cca2fd41d5aa7350cec1141fbe620',
        'info_dict': {
            'id': 'I107758',
            'ext': 'mp4',
            'title': 'TRADE - Wikileaks on TTIP',
            'description': 'NEW  LIVE EC Midday press briefing of 11/08/2015',
            'thumbnail': 're:^http://defiris\.ec\.streamcloud\.be/findmedia/18/107758/THUMB_[0-9A-Z]+\.jpg$'
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
        lang = query.get('sitelang', ['en'])[0]
        playlist = self._download_xml('http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=' + video_id, video_id)
        videos = {}
        formats = []
        for item in playlist.findall('info/title/item'):
            videos[xpath_text(item, 'lg')] = {'title': xpath_text(item, 'label').strip()}
        for item in playlist.findall('info/description/item'):
            videos[xpath_text(item, 'lg')]['description'] = xpath_text(item, 'label').strip()
        for item in playlist.findall('files/file'):
            lg = xpath_text(item, 'lg')
            vid = videos[lg]
            vid['format_note'] = xpath_text(item, 'lglabel')
            vid['url'] = xpath_text(item, 'url')
            if lg == lang:
                vid['language_preference'] = 10
            formats.append(vid)
        formats.reverse()
        def_video = videos.get(lang, videos['int'])
        return {
            'id': video_id,
            'title': def_video['title'],
            'description': def_video['description'],
            'thumbnail': xpath_text(playlist, 'info/thumburl', 'thumburl'),
            'formats': formats
        }