From 661cc229d2e885dd303d26535477c8905805ddf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jan 2017 19:08:01 +0700 Subject: [PATCH] [vlive:channel] Improve --- youtube_dl/extractor/vlive.py | 62 ++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 70bab1f046..b9718901b8 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -6,15 +6,19 @@ import time import itertools from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_str, +) from ..utils import ( dict_get, ExtractorError, float_or_none, int_or_none, remove_start, + try_get, urlencode_postdata, ) -from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): @@ -175,9 +179,9 @@ class VLiveIE(InfoExtractor): class VLiveChannelIE(InfoExtractor): IE_NAME = 'vlive:channel' - _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)/video' + _VALID_URL = r'https?://channels\.vlive\.tv/(?P[0-9A-Z]+)' _TEST = { - 'url': 'http://channels.vlive.tv/FCD4B/video', + 'url': 'http://channels.vlive.tv/FCD4B', 'info_dict': { 'id': 'FCD4B', 'title': 'MAMAMOO', @@ -191,21 +195,31 @@ class VLiveChannelIE(InfoExtractor): webpage = self._download_webpage( 'http://channels.vlive.tv/%s/video' % channel_code, channel_code) + + app_id = None + app_js_url = self._search_regex( - r'(http[^\'\"\s]+app\.js)', webpage, 'app js', default='') + r']+src=(["\'])(?Phttp.+?/app\.js.*?)\1', + webpage, 'app js', default=None, group='url') if app_js_url: - app_js = self._download_webpage(app_js_url, channel_code, 'app js') - app_id = self._search_regex( - r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', - app_js, 'app id', default=self._APP_ID) - else: - app_id = self._APP_ID + app_js = self._download_webpage( + app_js_url, channel_code, 'Downloading app JS', fatal=False) + if app_js: + app_id = self._search_regex( + r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]', + app_js, 'app id', default=None) + + app_id = app_id or self._APP_ID channel_info = self._download_json( 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode', - channel_code, note='decode channel code', - query={'app_id': app_id, 'channelCode': channel_code, '_': int(time.time())}) + channel_code, note='Downloading decode channel code', + query={ + 'app_id': app_id, + 'channelCode': channel_code, + '_': int(time.time()) + }) channel_seq = channel_info['result']['channelSeq'] channel_name = None @@ -214,7 +228,7 @@ class VLiveChannelIE(InfoExtractor): for page_num in itertools.count(1): video_list = self._download_json( 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', - channel_code, note='channel list %d' % page_num, + channel_code, note='Downloading channel list page #%d' % page_num, query={ 'app_id': app_id, 'channelSeq': channel_seq, @@ -223,17 +237,27 @@ class VLiveChannelIE(InfoExtractor): 'pageNo': page_num } ) - if not channel_name: - channel_name = video_list['result']['channelInfo']['channelName'] - if not video_list['result'].get('videoList'): + if not channel_name: + channel_name = try_get( + video_list, + lambda x: x['result']['channelInfo']['channelName'], + compat_str) + + videos = try_get( + video_list, lambda x: x['result']['videoList'], list) + if not videos: break - for video in video_list['result']['videoList']: - video_id = str(video['videoSeq']) + for video in videos: + video_id = video.get('videoSeq') + if not video_id: + continue + video_id = compat_str(video_id) entries.append( self.url_result( - 'http://www.vlive.tv/video/%s' % video_id, 'Vlive', video_id)) + 'http://www.vlive.tv/video/%s' % video_id, + ie=VLiveIE.ie_key(), video_id=video_id)) return self.playlist_result( entries, channel_code, channel_name)