yt-dlp/yt_dlp/extractor/jditemvideo.py

88 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import json
import random
import time
from .common import InfoExtractor
from ..utils import determine_ext
class JdItemVideoIE(InfoExtractor):
_VALID_URL = r"https://.+.jd.[a-z\.]{2,9}/(?P<id>\d{6,16}).html"
IE_NAME = 'jd-video'
IE_DESC = 'jd-video extractor'
_NETRC_MACHINE = False
_JD_API_VIDEO_CALLBACK_URL = 'https://cd.jd.com/tencent/video_v3?callback=jQuery{rand}&vid={video_id}&type=1&from=1&appid=24&_={timestamp}'
_TESTS = [
{
'url': 'https://npcitem.jd.hk/100030101538.html',
'info_dict': {
"id": "100030101538",
"ext": "mp4",
"title": "ipad 2021第九代",
"description": "【AppleiPad】Apple苹果 iPad 第9代 10.2英寸平板电脑 2021款 ipad964GB WLAN版/A13芯片/1200万像素/iPadOS深空灰色【行情 报价 价格 评测】-京东",
"size": 10251794,
"width": 1280,
"height": 1280,
"duration": 56,
"thumbnail": "https://jvod.300hu.com/img/2022/130871763/1/img7.jpg",
"url": "https://jvod.300hu.com/vod/product/6e02e2d8-98bc-491d-80a1-448ae5ea1c38/c6ef7b9b14ef4b9ca7e4cebda5b7684c.mp4?source=2&h265=h265/18799/a797504bd6f947dfbf6fdb96acfbb55f.mp4",
},
},
{
'url': 'https://npcitem.jd.hk/100030101538.html',
'info_dict': {
"id": "100037516759",
"ext": "mp4",
"title": "RODE Wireless Go II Dual",
"description": "【RODEWireless Go II Dual】罗德RODEWireless Go II Dual无线领夹麦克风单反手机无线小蜜蜂采访直播vlog收音 一拖二2代 标配【行情 报价 价格 评测】-京东",
"size": 7547769,
"width": 1280,
"height": 720,
"duration": 60,
"thumbnail": "https://jvod.300hu.com/img/2022/219535842/1/img7.jpg",
"url": "https://jvod.300hu.com/vod/product/1fc0661d-546e-446e-a429-a8db696ab06a/4067f4c3bb2d41c5af84081d2b0e3018.mp4?source=2&h265=h265/113074/cf365c28ca3a4fdb8178c4e44f916341.mp4",
},
},
]
def _real_extract(self, url):
item_id = self._match_id(url=url)
resp = self._download_webpage(url_or_request=url, video_id=item_id)
pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId',
default=None)
if pattern_data is None:
raise ValueError("There are no any video. %s" % url)
description = self._html_extract_title(resp)
rand = random.randint(433333, 999999)
timestamp = int(time.time() * 1000)
url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data)
mp4resp = self._download_webpage(url_or_request=url, video_id=item_id)
detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail', default=None)
if detailResp is None:
raise ValueError("Callback fail. return: %s" % detailResp)
detailRespJson = json.loads(detailResp)
if detailRespJson.get("code", -1) != 0:
raise ValueError("Callback fail. return: %s" % detailResp)
ext = determine_ext(url=detailRespJson.get("playUrl", ""))
info_dict = {
'id': item_id,
'ext': ext,
'title': detailRespJson.get("extInfo", {}).get("videoName") or "unknown_video_title",
'description': description,
'size': detailRespJson.get("extInfo", {}).get("size"),
'width': detailRespJson.get("extInfo", {}).get("vwidth"),
'height': detailRespJson.get("extInfo", {}).get("vheight"),
'duration': detailRespJson.get("duration"),
'thumbnail': detailRespJson.get("imageUrl"),
'url': detailRespJson.get("playUrl")
}
return info_dict