From c0837a12c8a64c682a01e4bfdee6f22615568d69 Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Sat, 12 Mar 2016 18:00:26 +0100
Subject: [PATCH 1/8] [WDR] complete overhaul after relaunch of the site

The WDR relaunched their site on 2016-02-23 which not only changed the
URL-schema completely but also the layout of their pages.

Apparently the whole "mediathek" now runs on the wdr-domain, so no
separate URL for funkhauseuropa anymore.
There seems to be no explicit handling of video-sizes on the page or in
the URLs anymore. There seems to be only one size for HTML5, but still
several sizes for flash. The extractor adds all to the list of formats.

There is no metadata for the HTML5-stream, so that the best flash-stream
will always be considered as the "best" format. At least in my tests
this seemed to be true anyway.
---
 youtube_dl/extractor/wdr.py | 251 +++++++++++++++---------------------
 1 file changed, 101 insertions(+), 150 deletions(-)
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 31c9043032..f881b7300e 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-import itertools
 import re
 
 from .common import InfoExtractor
@@ -11,204 +10,156 @@ from ..compat import (
 )
 from ..utils import (
     unified_strdate,
-    qualities,
+    ExtractorError,
 )
 
 
 class WDRIE(InfoExtractor):
-    _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?'
-    _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX
+    _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
+    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX
+
+    _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
 
     _TESTS = [
         {
-            'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html',
+            'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
+            'md5': 'e58c39c3e30077141d258bf588700a7b',
             'info_dict': {
-                'id': 'mdb-362427',
+                'id': 'mdb-1058683',
                 'ext': 'flv',
-                'title': 'Servicezeit',
-                'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
-                'upload_date': '20140310',
-                'is_live': False
-            },
-            'params': {
-                'skip_download': True,
+                'display_id': 'doku-am-freitag/video-geheimnis-aachener-dom-100',
+                'title': 'Geheimnis Aachener Dom',
+                'alt_title': 'Doku am Freitag',
+                'upload_date': '20160304',
+                'description': 'md5:87be8ff14d8dfd7a7ee46f0299b52318',
+                'is_live': False,
+                'subtitles': {'de': [{
+                    'url': 'http://ondemand-ww.wdr.de/medp/fsk0/105/1058683/1058683_12220974.xml'
+                }]},
             },
             'skip': 'Page Not Found',
         },
         {
-            'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html',
+            'url': 'http://www1.wdr.de/mediathek/audio/wdr3/wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100.html',
+            'md5': 'f4c1f96d01cf285240f53ea4309663d8',
             'info_dict': {
-                'id': 'mdb-363194',
+                'id': 'mdb-1072000',
+                'ext': 'mp3',
+                'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100',
+                'title': 'Schriftstellerin Juli Zeh',
+                'alt_title': 'WDR 3 Gespräch am Samstag',
+                'upload_date': '20160312',
+                'description': 'md5:e127d320bc2b1f149be697ce044a3dd7',
+                'is_live': False,
+                'subtitles': {}
+            },
+            'skip': 'Page Not Found',
+        },
+        {
+            'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
+            'info_dict': {
+                'id': 'mdb-103364',
                 'ext': 'flv',
-                'title': 'Marga Spiegel ist tot',
-                'description': 'md5:2309992a6716c347891c045be50992e4',
-                'upload_date': '20140311',
-                'is_live': False
-            },
-            'params': {
-                'skip_download': True,
-            },
-            'skip': 'Page Not Found',
-        },
-        {
-            'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html',
-            'md5': '83e9e8fefad36f357278759870805898',
-            'info_dict': {
-                'id': 'mdb-194332',
-                'ext': 'mp3',
-                'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
-                'description': 'md5:2309992a6716c347891c045be50992e4',
-                'upload_date': '20091129',
-                'is_live': False
-            },
-        },
-        {
-            'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
-            'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
-            'info_dict': {
-                'id': 'mdb-478135',
-                'ext': 'mp3',
-                'title': 'Flavia Coelho: Amar é Amar',
-                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
-                'upload_date': '20140717',
-                'is_live': False
-            },
-            'skip': 'Page Not Found',
-        },
-        {
-            'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',
-            'playlist_mincount': 146,
-            'info_dict': {
-                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
+                'display_id': 'index',
+                'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'alt_title': 'WDR Fernsehen Live',
+                'upload_date': None,
+                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'is_live': True,
+                'subtitles': {}
             }
         },
         {
-            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
+            'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
+            'playlist_mincount': 10,
             'info_dict': {
-                'id': 'mdb-103364',
-                'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
-                'ext': 'flv',
-                'upload_date': '20150101',
-                'is_live': True
-            },
-            'params': {
-                'skip_download': True,
+                'id': 'aktuelle-stunde/aktuelle-stunde-120',
             },
         }
     ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        page_url = mobj.group('url')
-        page_id = mobj.group('id')
+        url_type = mobj.group('type')
+        page_url = mobj.group('page_url')
+        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, display_id)
 
-        webpage = self._download_webpage(url, page_id)
+        js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None)
 
-        if mobj.group('player') is None:
+        if not js_url:
             entries = [
-                self.url_result(page_url + href, 'WDR')
+                self.url_result(page_url + href[0], 'WDR')
                 for href in re.findall(
-                    r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX,
+                    r'<a href="(%s)"' % self._PAGE_REGEX,
                     webpage)
             ]
 
             if entries:  # Playlist page
-                return self.playlist_result(entries, page_id)
+                return self.playlist_result(entries, playlist_id=display_id)
 
-            # Overview page
-            entries = []
-            for page_num in itertools.count(2):
-                hrefs = re.findall(
-                    r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"',
-                    webpage)
-                entries.extend(
-                    self.url_result(page_url + href, 'WDR')
-                    for href in hrefs)
-                next_url_m = re.search(
-                    r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage)
-                if not next_url_m:
-                    break
-                next_url = page_url + next_url_m.group(1)
-                webpage = self._download_webpage(
-                    next_url, page_id,
-                    note='Downloading playlist page %d' % page_num)
-            return self.playlist_result(entries, page_id)
+            raise ExtractorError('No downloadable streams found', expected=True)
 
-        flashvars = compat_parse_qs(self._html_search_regex(
-            r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars'))
+        js_data = self._download_webpage(js_url, 'metadata')
+        json_data = self._search_regex(r'\(({.*})\)', js_data, 'json')
+        metadata = self._parse_json(json_data, display_id)
 
-        page_id = flashvars['trackerClipId'][0]
-        video_url = flashvars['dslSrc'][0]
-        title = flashvars['trackerClipTitle'][0]
-        thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
-        is_live = flashvars.get('isLive', ['0'])[0] == '1'
+        metadata_tracker_data = metadata["trackerData"]
+        metadata_media_resource = metadata["mediaResource"]
+
+        formats = []
+
+        # check if the metadata contains a direct URL to a file
+        metadata_media_alt = metadata_media_resource.get("alt")
+        if metadata_media_alt:
+            for tag_name in ["videoURL", 'audioURL']:
+                if tag_name in metadata_media_alt:
+                    formats.append({
+                        'url': metadata_media_alt[tag_name]
+                    })
+
+        # check if there are flash-streams for this video
+        if "dflt" in metadata_media_resource and "videoURL" in metadata_media_resource["dflt"]:
+            video_url = metadata_media_resource["dflt"]["videoURL"]
+            if video_url.endswith('.f4m'):
+                full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
+                formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False))
+            elif video_url.endswith('.smil'):
+                formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False))
+
+        subtitles = {}
+        caption_url = metadata_media_resource.get("captionURL")
+        if caption_url:
+            subtitles['de'] = [{
+                'url': caption_url
+            }]
+
+        title = metadata_tracker_data.get("trackerClipTitle")
+        is_live = url_type == 'live'
 
         if is_live:
             title = self._live_title(title)
-
-        if 'trackerClipAirTime' in flashvars:
-            upload_date = flashvars['trackerClipAirTime'][0]
+            upload_date = None
+        elif 'trackerClipAirTime' in metadata_tracker_data:
+            upload_date = metadata_tracker_data['trackerClipAirTime']
         else:
-            upload_date = self._html_search_meta(
-                'DC.Date', webpage, 'upload date')
+            upload_date = self._html_search_meta('DC.Date', webpage, 'upload date')
 
         if upload_date:
             upload_date = unified_strdate(upload_date)
 
-        formats = []
-        preference = qualities(['S', 'M', 'L', 'XL'])
-
-        if video_url.endswith('.f4m'):
-            formats.extend(self._extract_f4m_formats(
-                video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id,
-                f4m_id='hds', fatal=False))
-        elif video_url.endswith('.smil'):
-            formats.extend(self._extract_smil_formats(
-                video_url, page_id, False, {
-                    'hdcore': '3.3.0',
-                    'plugin': 'aasp-3.3.0.99.43',
-                }))
-        else:
-            formats.append({
-                'url': video_url,
-                'http_headers': {
-                    'User-Agent': 'mobile',
-                },
-            })
-
-        m3u8_url = self._search_regex(
-            r'rel="adaptiv"[^>]+href="([^"]+)"',
-            webpage, 'm3u8 url', default=None)
-        if m3u8_url:
-            formats.extend(self._extract_m3u8_formats(
-                m3u8_url, page_id, 'mp4', 'm3u8_native',
-                m3u8_id='hls', fatal=False))
-
-        direct_urls = re.findall(
-            r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage)
-        if direct_urls:
-            for quality, video_url in direct_urls:
-                formats.append({
-                    'url': video_url,
-                    'preference': preference(quality),
-                    'http_headers': {
-                        'User-Agent': 'mobile',
-                    },
-                })
-
         self._sort_formats(formats)
 
-        description = self._html_search_meta('Description', webpage, 'description')
-
         return {
-            'id': page_id,
-            'formats': formats,
+            'id': metadata_tracker_data.get("trackerClipId", display_id),
+            'display_id': display_id,
             'title': title,
-            'description': description,
-            'thumbnail': thumbnail,
+            'alt_title': metadata_tracker_data.get("trackerClipSubcategory"),
+            'formats': formats,
             'upload_date': upload_date,
-            'is_live': is_live
+            'description': self._html_search_meta("Description", webpage),
+            'is_live': is_live,
+            'subtitles': subtitles,
         }
 
 

From 14f7a2b8af17d1f490c46a0a9028ba9d97cf7df2 Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Sat, 12 Mar 2016 20:14:46 +0100
Subject: [PATCH 2/8] [WDRMaus] switch current show to new WDR extractor (fixes
 #8562)

It seems that the "current show" already uses the new WDR video-player,
while all the others videos still use the old player.

I just added the current show URL to the normal WDR-extractor, which
works fine. This commit needs my changes from PR #8842 that fix the
support for WDR.
---
 youtube_dl/extractor/wdr.py | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index f881b7300e..ec81f1a28a 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -15,8 +15,9 @@ from ..utils import (
 
 
 class WDRIE(InfoExtractor):
+    _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5'
     _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
-    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX
+    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL
 
     _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
 
@@ -75,7 +76,18 @@ class WDRIE(InfoExtractor):
             'info_dict': {
                 'id': 'aktuelle-stunde/aktuelle-stunde-120',
             },
-        }
+        },
+        {
+            'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+            'info_dict': {
+                'id': 'mdb-1096487',
+                'ext': 'flv',
+                'upload_date': 're:^[0-9]{8}$',
+                'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
+                'description': '- Die Sendung mit der Maus -',
+            },
+            'skip': 'The id changes from week to week because of the new episode'
+        },
     ]
 
     def _real_extract(self, url):
@@ -195,26 +207,17 @@ class WDRMobileIE(InfoExtractor):
 
 
 class WDRMausIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))'
+    _VALID_URL = 'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)((?<!index)\.php5|/(?:$|[?#]))'
     IE_DESC = 'Sendung mit der Maus'
     _TESTS = [{
-        'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
+        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
+        'md5': '178b432d002162a14ccb3e0876741095',
         'info_dict': {
-            'id': 'aktuelle-sendung',
+            'id': 'achterbahn',
             'ext': 'mp4',
             'thumbnail': 're:^http://.+\.jpg',
-            'upload_date': 're:^[0-9]{8}$',
-            'title': 're:^[0-9.]{10} - Aktuelle Sendung$',
-        }
-    }, {
-        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5',
-        'md5': '3b1227ca3ed28d73ec5737c65743b2a3',
-        'info_dict': {
-            'id': '40_jahre_maus',
-            'ext': 'mp4',
-            'thumbnail': 're:^http://.+\.jpg',
-            'upload_date': '20131007',
-            'title': '12.03.2011 - 40 Jahre Maus',
+            'upload_date': '20131001',
+            'title': '19.09.2013 - Achterbahn',
         }
     }]
 

From 3874e6ea66c738910c6a1065b2d781e04a8143ae Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Thu, 26 May 2016 16:45:14 +0200
Subject: [PATCH 3/8] [WDR] use single quotes for strings

---
 youtube_dl/extractor/wdr.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index ec81f1a28a..05bfe7deb3 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -17,7 +17,7 @@ from ..utils import (
 class WDRIE(InfoExtractor):
     _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5'
     _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
-    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + "|" + _CURRENT_MAUS_URL
+    _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
 
     _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
 
@@ -116,23 +116,23 @@ class WDRIE(InfoExtractor):
         json_data = self._search_regex(r'\(({.*})\)', js_data, 'json')
         metadata = self._parse_json(json_data, display_id)
 
-        metadata_tracker_data = metadata["trackerData"]
-        metadata_media_resource = metadata["mediaResource"]
+        metadata_tracker_data = metadata['trackerData']
+        metadata_media_resource = metadata['mediaResource']
 
         formats = []
 
         # check if the metadata contains a direct URL to a file
-        metadata_media_alt = metadata_media_resource.get("alt")
+        metadata_media_alt = metadata_media_resource.get('alt')
         if metadata_media_alt:
-            for tag_name in ["videoURL", 'audioURL']:
+            for tag_name in ['videoURL', 'audioURL']:
                 if tag_name in metadata_media_alt:
                     formats.append({
                         'url': metadata_media_alt[tag_name]
                     })
 
         # check if there are flash-streams for this video
-        if "dflt" in metadata_media_resource and "videoURL" in metadata_media_resource["dflt"]:
-            video_url = metadata_media_resource["dflt"]["videoURL"]
+        if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']:
+            video_url = metadata_media_resource['dflt']['videoURL']
             if video_url.endswith('.f4m'):
                 full_video_url = video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
                 formats.extend(self._extract_f4m_formats(full_video_url, display_id, f4m_id='hds', fatal=False))
@@ -140,13 +140,13 @@ class WDRIE(InfoExtractor):
                 formats.extend(self._extract_smil_formats(video_url, 'stream', fatal=False))
 
         subtitles = {}
-        caption_url = metadata_media_resource.get("captionURL")
+        caption_url = metadata_media_resource.get('captionURL')
         if caption_url:
             subtitles['de'] = [{
                 'url': caption_url
             }]
 
-        title = metadata_tracker_data.get("trackerClipTitle")
+        title = metadata_tracker_data.get('trackerClipTitle')
         is_live = url_type == 'live'
 
         if is_live:
@@ -163,13 +163,13 @@ class WDRIE(InfoExtractor):
         self._sort_formats(formats)
 
         return {
-            'id': metadata_tracker_data.get("trackerClipId", display_id),
+            'id': metadata_tracker_data.get('trackerClipId', display_id),
             'display_id': display_id,
             'title': title,
-            'alt_title': metadata_tracker_data.get("trackerClipSubcategory"),
+            'alt_title': metadata_tracker_data.get('trackerClipSubcategory'),
             'formats': formats,
             'upload_date': upload_date,
-            'description': self._html_search_meta("Description", webpage),
+            'description': self._html_search_meta('Description', webpage),
             'is_live': is_live,
             'subtitles': subtitles,
         }

From 37f972954da0d0f1f0c5e97da8357c4baf687ee6 Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Thu, 26 May 2016 16:59:45 +0200
Subject: [PATCH 4/8] [WDR] use _download_json with a strip_jsonp

---
 youtube_dl/extractor/wdr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 05bfe7deb3..73a343c69b 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -9,6 +9,7 @@ from ..compat import (
     compat_urlparse,
 )
 from ..utils import (
+    strip_jsonp,
     unified_strdate,
     ExtractorError,
 )
@@ -112,9 +113,8 @@ class WDRIE(InfoExtractor):
 
             raise ExtractorError('No downloadable streams found', expected=True)
 
-        js_data = self._download_webpage(js_url, 'metadata')
-        json_data = self._search_regex(r'\(({.*})\)', js_data, 'json')
-        metadata = self._parse_json(json_data, display_id)
+        metadata = self._download_json(
+            js_url, 'metadata', transform_source=strip_jsonp)
 
         metadata_tracker_data = metadata['trackerData']
         metadata_media_resource = metadata['mediaResource']

From bec2c14f2cf4f06f1b99e04d59779d8d103d726a Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Thu, 26 May 2016 17:30:38 +0200
Subject: [PATCH 5/8] [WDR] add special handling if alt-url is a m3u8

---
 youtube_dl/extractor/wdr.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 73a343c69b..fddcbf1907 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -9,6 +9,7 @@ from ..compat import (
     compat_urlparse,
 )
 from ..utils import (
+    determine_ext,
     strip_jsonp,
     unified_strdate,
     ExtractorError,
@@ -61,7 +62,7 @@ class WDRIE(InfoExtractor):
             'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
             'info_dict': {
                 'id': 'mdb-103364',
-                'ext': 'flv',
+                'ext': 'mp4',
                 'display_id': 'index',
                 'title': r're:^WDR Fernsehen im Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
                 'alt_title': 'WDR Fernsehen Live',
@@ -69,7 +70,10 @@ class WDRIE(InfoExtractor):
                 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
                 'is_live': True,
                 'subtitles': {}
-            }
+            },
+            'params': {
+                'skip_download': True,  # m3u8 download
+            },
         },
         {
             'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
@@ -126,9 +130,16 @@ class WDRIE(InfoExtractor):
         if metadata_media_alt:
             for tag_name in ['videoURL', 'audioURL']:
                 if tag_name in metadata_media_alt:
-                    formats.append({
-                        'url': metadata_media_alt[tag_name]
-                    })
+                    alt_url = metadata_media_alt[tag_name]
+                    if determine_ext(alt_url) == 'm3u8':
+                        m3u_fmt = self._extract_m3u8_formats(
+                            alt_url, display_id, 'mp4', 'm3u8_native',
+                            m3u8_id='hls')
+                        formats.extend(m3u_fmt)
+                    else:
+                        formats.append({
+                            'url': alt_url
+                        })
 
         # check if there are flash-streams for this video
         if 'dflt' in metadata_media_resource and 'videoURL' in metadata_media_resource['dflt']:

From 33a1ff7113d9dd656b3c56cb404de85646caa559 Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Thu, 26 May 2016 19:08:12 +0200
Subject: [PATCH 6/8] [WDR] extract jsonp-url by parsing data-extension of
 mediaLink

---
 youtube_dl/extractor/wdr.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index fddcbf1907..dd107ef8af 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -10,6 +10,7 @@ from ..compat import (
 )
 from ..utils import (
     determine_ext,
+    js_to_json,
     strip_jsonp,
     unified_strdate,
     ExtractorError,
@@ -21,8 +22,6 @@ class WDRIE(InfoExtractor):
     _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
     _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
 
-    _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)'
-
     _TESTS = [
         {
             'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html',
@@ -102,9 +101,13 @@ class WDRIE(InfoExtractor):
         display_id = mobj.group('display_id')
         webpage = self._download_webpage(url, display_id)
 
-        js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None)
+        # for wdr.de the data-extension is in a tag with the class "mediaLink"
+        # for wdrmaus its in a link to the page in a multiline "videoLink"-tag
+        json_metadata = self._html_search_regex(
+            r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"',
+            webpage, 'media link', default=None, flags=re.MULTILINE)
 
-        if not js_url:
+        if not json_metadata:
             entries = [
                 self.url_result(page_url + href[0], 'WDR')
                 for href in re.findall(
@@ -117,8 +120,12 @@ class WDRIE(InfoExtractor):
 
             raise ExtractorError('No downloadable streams found', expected=True)
 
+        media_link_obj = self._parse_json(json_metadata, display_id,
+                                          transform_source=js_to_json)
+        jsonp_url = media_link_obj['mediaObj']['url']
+
         metadata = self._download_json(
-            js_url, 'metadata', transform_source=strip_jsonp)
+            jsonp_url, 'metadata', transform_source=strip_jsonp)
 
         metadata_tracker_data = metadata['trackerData']
         metadata_media_resource = metadata['mediaResource']

From 949fc42e009aed5414caad280d0dc551ffcd9c14 Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Thu, 26 May 2016 19:58:55 +0200
Subject: [PATCH 7/8] [WDR] the other wdrmaus.de pages also changed to the new
 player

---
 youtube_dl/extractor/extractors.py |  1 -
 youtube_dl/extractor/wdr.py        | 89 +++++-------------------------
 2 files changed, 15 insertions(+), 75 deletions(-)

diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 6de3438fc8..023598130e 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -923,7 +923,6 @@ from .wat import WatIE
 from .wdr import (
     WDRIE,
     WDRMobileIE,
-    WDRMausIE,
 )
 from .webofstories import (
     WebOfStoriesIE,
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index dd107ef8af..1af1e996d8 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -4,10 +4,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_parse_qs,
-    compat_urlparse,
-)
 from ..utils import (
     determine_ext,
     js_to_json,
@@ -18,7 +14,7 @@ from ..utils import (
 
 
 class WDRIE(InfoExtractor):
-    _CURRENT_MAUS_URL = r'https?://www.wdrmaus.de/aktuelle-sendung/(wdr|index).php5'
+    _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
     _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html'
     _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
 
@@ -92,6 +88,20 @@ class WDRIE(InfoExtractor):
             },
             'skip': 'The id changes from week to week because of the new episode'
         },
+        {
+            'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
+            'md5': 'ca365705551e4bd5217490f3b0591290',
+            'info_dict': {
+                'id': 'mdb-186083',
+                'ext': 'flv',
+                'upload_date': '20130919',
+                'title': 'Sachgeschichte - Achterbahn ',
+                'description': '- Die Sendung mit der Maus -',
+            },
+            'params': {
+                'skip_download': True,  # the file has different versions :(
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -222,72 +232,3 @@ class WDRMobileIE(InfoExtractor):
                 'User-Agent': 'mobile',
             },
         }
-
-
-class WDRMausIE(InfoExtractor):
-    _VALID_URL = 'https?://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)((?<!index)\.php5|/(?:$|[?#]))'
-    IE_DESC = 'Sendung mit der Maus'
-    _TESTS = [{
-        'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5',
-        'md5': '178b432d002162a14ccb3e0876741095',
-        'info_dict': {
-            'id': 'achterbahn',
-            'ext': 'mp4',
-            'thumbnail': 're:^http://.+\.jpg',
-            'upload_date': '20131001',
-            'title': '19.09.2013 - Achterbahn',
-        }
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        webpage = self._download_webpage(url, video_id)
-        param_code = self._html_search_regex(
-            r'<a href="\?startVideo=1&amp;([^"]+)"', webpage, 'parameters')
-
-        title_date = self._search_regex(
-            r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>',
-            webpage, 'air date')
-        title_str = self._html_search_regex(
-            r'<h1>(.*?)</h1>', webpage, 'title')
-        title = '%s - %s' % (title_date, title_str)
-        upload_date = unified_strdate(
-            self._html_search_meta('dc.date', webpage))
-
-        fields = compat_parse_qs(param_code)
-        video_url = fields['firstVideo'][0]
-        thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0])
-
-        formats = [{
-            'format_id': 'rtmp',
-            'url': video_url,
-        }]
-
-        jscode = self._download_webpage(
-            'http://www.wdrmaus.de/codebase/js/extended-medien.min.js',
-            video_id, fatal=False,
-            note='Downloading URL translation table',
-            errnote='Could not download URL translation table')
-        if jscode:
-            for m in re.finditer(
-                    r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}",
-                    jscode):
-                if video_url.startswith(m.group('stream')):
-                    http_url = video_url.replace(
-                        m.group('stream'), m.group('dl'))
-                    formats.append({
-                        'format_id': 'http',
-                        'url': http_url,
-                    })
-                    break
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': formats,
-            'thumbnail': thumbnail,
-            'upload_date': upload_date,
-        }

From 3a686853e1739dfc26548cdc09fe89e693e76a9f Mon Sep 17 00:00:00 2001
From: Boris Wachtmeister <boris-code@gmx.com>
Date: Thu, 26 May 2016 20:16:33 +0200
Subject: [PATCH 8/8] [WDR] fixed parsing of playlists

---
 youtube_dl/extractor/wdr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index 1af1e996d8..1e729cb7cb 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -72,7 +72,7 @@ class WDRIE(InfoExtractor):
         },
         {
             'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
-            'playlist_mincount': 10,
+            'playlist_mincount': 8,
             'info_dict': {
                 'id': 'aktuelle-stunde/aktuelle-stunde-120',
             },
@@ -121,7 +121,7 @@ class WDRIE(InfoExtractor):
             entries = [
                 self.url_result(page_url + href[0], 'WDR')
                 for href in re.findall(
-                    r'<a href="(%s)"' % self._PAGE_REGEX,
+                    r'<a href="(%s)"[^>]+data-extension=' % self._PAGE_REGEX,
                     webpage)
             ]