1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-10 00:40:10 +01:00

Compare commits

...

23 Commits

Author SHA1 Message Date
bashonly
411ed0af08
[ie/tiktok] Adapt extracted fields and update tests
Authored by: bashonly
2024-02-27 21:24:57 -06:00
bashonly
909c85ec3f
Merge branch 'yt-dlp:master' into cleanup/info-dict-types 2024-02-27 20:41:24 -06:00
114514ns
9ff9466455
[ie/Douyin] Fix extractor (#9239)
Closes #7854, Closes #7941
Authored by: 114514ns, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-02-28 02:30:58 +00:00
marcdumais
e28e135d6f
[ie/altcensored:channel] Fix playlist extraction (#9297)
Authored by: marcdumais
2024-02-25 23:21:08 +00:00
Tobias Gruetzmacher
f1570ab84d
Bugfix for 1713c882730a928ac344c099874d2093fc2c8b51 (#9298)
Authored by: TobiX
2024-02-25 23:11:47 +00:00
pukkandan
069b2aedae
Create ydl._request_director when needed 2024-02-25 06:06:42 +05:30
Simon Sawicki
5eedc208ec
[ie/youtube] Better error when all player responses are skipped (#9083)
Authored by: Grub4K, pukkandan

Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
2024-02-24 23:20:22 +00:00
bashonly
464c919ea8
[ie/CloudflareStream] Improve embed detection (#9287)
Partially addresses #7858
Authored by: bashonly
2024-02-24 23:13:26 +00:00
bashonly
3894ab9574
[ie/archiveorg] Fix format URL encoding (#9279)
Closes #9173
Authored by: bashonly
2024-02-24 23:12:04 +00:00
bashonly
b05640d532
[ie/swearnet] Raise for login required (#9281)
Closes #9110
Authored by: bashonly
2024-02-24 23:11:28 +00:00
bashonly
7a29cbbd5f
[ie/ntvru] Fix extraction (#9276)
Closes #8347
Authored by: bashonly, dirkf

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-02-24 23:10:37 +00:00
bashonly
2e8de097ad
[ie/vimeo] Fix login (#9274)
Closes #9273
Authored by: bashonly
2024-02-24 23:09:04 +00:00
bashonly
f3d5face83
[ie/CloudflareStream] Improve _VALID_URL (#9280)
Closes #9171
Authored by: bashonly
2024-02-24 22:02:13 +00:00
bashonly
eabbccc439
[build] Support failed build job re-runs (#9277)
Authored by: bashonly
2024-02-24 17:00:27 +00:00
sepro
0de09c5b9e
[ie/nebula] Support podcasts (#9140)
Closes #8838
Authored by: seproDev, c-basalt

Co-authored-by: c-basalt <117849907+c-basalt@users.noreply.github.com>
2024-02-24 17:08:47 +01:00
sepro
6a6cdcd182
[core] Warn user when not launching through shell on Windows (#9250)
Authored by: seproDev, Grub4K

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
2024-02-24 12:58:03 +01:00
J. Gonzalez
998dffb5a2
[ie/cnbc] Overhaul extractors (#8741)
Closes #5871, Closes #8378
Authored by: gonzalezjo, Noor-5, zhijinwuu, ruiminggu, seproDev

Co-authored-by: Noor Mostafa <93787875+Noor-5@users.noreply.github.com>
Co-authored-by: zhijinwuu <zhijinw@andrew.cmu.edu>
Co-authored-by: ruiminggu <ruimingg@andrew.cmu.edu>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-02-23 17:07:35 +01:00
sepro
29a74a6126
[ie/NerdCubedFeed] Overhaul extractor (#9269)
Authored by: seproDev
2024-02-23 16:59:13 +01:00
bashonly
55f1833376
[ie/twitter] Extract numeric channel_id (#9263)
Authored by: bashonly
2024-02-22 00:49:21 +00:00
gmes78
3d9dc2f359
[ie/Rule34Video] Extract creators (#9258)
Authored by: gmes78
2024-02-22 00:48:49 +00:00
bashonly
28e53d60df
[ie/twitter] Extract bitrate for HLS audio formats (#9257)
Closes #9202
Authored by: bashonly
2024-02-21 08:39:10 +00:00
fireattack
f591e605df
[ie/openrec] Pass referer for m3u8 formats (#9253)
Closes #6946
Authored by: fireattack
2024-02-21 03:46:55 +00:00
Jade Laurence Empleo
9a8afadd17
[plugins] Handle PermissionError (#9229)
Authored by: syntaxsurge, pukkandan
2024-02-20 14:37:37 +05:30
22 changed files with 482 additions and 252 deletions

View File

@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-linux_${{ matrix.architecture }} name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0 compression-level: 0
@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0 compression-level: 0
@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0 compression-level: 0
@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-* pattern: build-bin-*
merge-multiple: true merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
@ -484,3 +484,4 @@ jobs:
_update_spec _update_spec
SHA*SUMS* SHA*SUMS*
compression-level: 0 compression-level: 0
overwrite: true

View File

@ -690,7 +690,6 @@ class YoutubeDL:
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None) self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
@ -964,6 +963,7 @@ class YoutubeDL:
def close(self): def close(self):
self.save_cookies() self.save_cookies()
self._request_director.close() self._request_director.close()
del self._request_director
def trouble(self, message=None, tb=None, is_error=True): def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@ -4160,6 +4160,10 @@ class YoutubeDL:
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director return director
@functools.cached_property
def _request_director(self):
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
def encode(self, s): def encode(self, s):
if isinstance(s, bytes): if isinstance(s, bytes):
return s # Already encoded return s # Already encoded

View File

@ -14,7 +14,7 @@ import os
import re import re
import traceback import traceback
from .compat import compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')

View File

@ -379,7 +379,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import ( from .cnbc import (
CNBCIE,
CNBCVideoIE, CNBCVideoIE,
) )
from .cnn import ( from .cnn import (

View File

@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm', 'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403', 'release_date': '20180403',
'creator': 'Virginie Vota', 'creators': ['Virginie Vota'],
'release_year': 2018, 'release_year': 2018,
'upload_date': '20230318', 'upload_date': '20230318',
'uploader': 'admin@altcensored.com', 'uploader': 'admin@altcensored.com',
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
'duration': 926.09, 'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int, 'view_count': int,
'categories': ['News & Politics'], 'categories': ['News & Politics'], # FIXME
} }
}] }]
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
'title': 'Virginie Vota', 'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw', 'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
}, },
'playlist_count': 91 'playlist_count': 85,
}, { }, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw', 'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': { 'info_dict': {
'title': 'yukikaze775', 'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw', 'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
}, },
'playlist_count': 4 'playlist_count': 4,
}, {
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
'info_dict': {
'title': 'Mister Metokur',
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
},
'playlist_count': 121,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage') url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False) title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex( page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>', r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1')) webpage, 'page count', default='1'))
def page_func(page_num): def page_func(page_num):

View File

@ -301,7 +301,7 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({ entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'], 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'), 'format': f.get('format'),
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),

View File

@ -1996,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': get_element_by_class( 'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage), 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class( 'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'), 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
}, self._search_json_ld(webpage, video_id, default={})) }, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None): def _get_comments_reply(self, root_id, next_id=0, display_id=None):

View File

@ -4,27 +4,25 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x) _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
https?:// _EMBED_REGEX = [
(?: rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
(?:watch\.)?%s/| rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
%s ]
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': { 'info_dict': {
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
'info_dict': {
'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,68 +1,97 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
'info_dict': {
'id': '3000503714',
'ext': 'mp4',
'title': 'Fighting zombies is big business',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
'timestamp': 1459332000,
'upload_date': '20160330',
'uploader': 'NBCU-CNBC',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', _TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': { 'info_dict': {
'id': '7000031301',
'ext': 'mp4', 'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates", 'id': '107344774',
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'timestamp': 1531958400, 'modified_timestamp': 1702053483,
'upload_date': '20180719', 'timestamp': 1701977810,
'uploader': 'NBCU-CNBC', 'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'author': 'Sean Conlon',
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
'skip_download': True, }, {
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4',
'id': '107345451',
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'timestamp': 1702080139,
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
}, },
'skip': 'Dead link', 'expected_warnings': ['Unable to download f4m manifest'],
} }, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
video_id = self._download_json( webpage = self._download_webpage(url, display_id)
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
'query': '''{
page(path: "%s") { player_data = traverse_obj(data, (
vcpsId 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
} lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
}''' % path,
})['data']['page']['vcpsId'] return {
return self.url_result( 'id': display_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id, 'display_id': display_id,
CNBCIE.ie_key()) 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'author': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}, get_all=False),
}

View File

@ -1,6 +1,7 @@
import itertools import itertools
import json import json
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
class NebulaIE(NebulaBaseIE): class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': { 'info_dict': {
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
class NebulaClassIE(NebulaBaseIE): class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class' IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)' _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': { 'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video', 'title': 'Photos, Sculpture, and Video',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
metadata = self._call_api( metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons', f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata') slug, note='Fetching class/podcast metadata')
return { content_type = metadata.get('type')
**self._extract_video_metadata(metadata), if content_type == 'lesson':
**self._extract_formats(metadata['id'], slug), return {
} **self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE): class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions' IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)' _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/myshows', 'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1, 'playlist_mincount': 1,
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE): class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel' IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])' _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money', 'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': { 'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742', 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
}, },
'playlist_count': 23, 'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}] }]
def _generate_playlist_entries(self, collection_id, collection_slug): def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}', lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata) {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url): def _real_extract(self, url):
collection_slug = self._match_id(url) collection_slug = self._match_id(url)
channel = self._call_api( channel = self._call_api(
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
if channel.get('type') == 'class': if channel.get('type') == 'class':
entries = self._generate_class_entries(channel) entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else: else:
entries = self._generate_playlist_entries(channel['id'], collection_slug) entries = self._generate_playlist_entries(channel['id'], collection_slug)

View File

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor): class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed', 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 5500,
} }
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url): def _real_extract(self, url):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
entries = [{ return self.playlist_result(
'_type': 'url', map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
'title': feed_entry['title'], video_id, 'nerdcubed.co.uk feed')
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}

View File

@ -35,6 +35,7 @@ class NTVRuIE(InfoExtractor):
'duration': 172, 'duration': 172,
'view_count': int, 'view_count': int,
}, },
'skip': '404 Not Found',
}, { }, {
'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
'md5': '82dbd49b38e3af1d00df16acbeab260c', 'md5': '82dbd49b38e3af1d00df16acbeab260c',
@ -78,7 +79,8 @@ class NTVRuIE(InfoExtractor):
}] }]
_VIDEO_ID_REGEXES = [ _VIDEO_ID_REGEXES = [
r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)', r'<meta property="og:url" content="https?://www\.ntv\.ru/video/(\d+)',
r'<meta property="og:video:(?:url|iframe)" content="https?://www\.ntv\.ru/embed/(\d+)',
r'<video embed=[^>]+><id>(\d+)</id>', r'<video embed=[^>]+><id>(\d+)</id>',
r'<video restriction[^>]+><key>(\d+)</key>', r'<video restriction[^>]+><key>(\d+)</key>',
] ]

View File

@ -12,6 +12,8 @@ from ..compat import compat_str
class OpenRecBaseIE(InfoExtractor): class OpenRecBaseIE(InfoExtractor):
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
def _extract_pagestore(self, webpage, video_id): def _extract_pagestore(self, webpage, video_id):
return self._parse_json( return self._parse_json(
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
@ -21,7 +23,7 @@ class OpenRecBaseIE(InfoExtractor):
if not m3u8_url: if not m3u8_url:
continue continue
yield from self._extract_m3u8_formats( yield from self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id=name) m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
def _extract_movie(self, webpage, video_id, name, is_live): def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id) window_stores = self._extract_pagestore(webpage, video_id)
@ -60,6 +62,7 @@ class OpenRecBaseIE(InfoExtractor):
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live, 'is_live': is_live,
'http_headers': self._M3U8_HEADERS,
} }
@ -110,7 +113,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
raise ExtractorError('Cannot extract title') raise ExtractorError('Cannot extract title')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4') capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
return { return {
'id': video_id, 'id': video_id,
@ -121,6 +124,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
'upload_date': unified_strdate(capture_data.get('createdAt')), 'upload_date': unified_strdate(capture_data.get('createdAt')),
'http_headers': self._M3U8_HEADERS,
} }

View File

@ -9,7 +9,6 @@ from ..utils import (
get_element_html_by_class, get_element_html_by_class,
get_elements_by_class, get_elements_by_class,
int_or_none, int_or_none,
join_nonempty,
parse_count, parse_count,
parse_duration, parse_duration,
unescapeHTML, unescapeHTML,
@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'timestamp': 1640131200, 'timestamp': 1640131200,
'description': '', 'description': '',
'creator': 'WildeerStudio', 'creators': ['WildeerStudio'],
'upload_date': '20211222', 'upload_date': '20211222',
'uploader': 'CerZule', 'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/', 'uploader_url': 'https://rule34video.com/members/36281/',
@ -81,13 +80,13 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality, 'quality': quality,
}) })
categories, creator, uploader, uploader_url = [None] * 4 categories, creators, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage): for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col)) label = clean_html(get_element_by_class('label', col))
if label == 'Categories:': if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col))) categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:': elif label == 'Artist:':
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ') creators = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Uploaded By:': elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col)) uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
@ -115,7 +114,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int_or_none(self._search_regex( 'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18, 'age_limit': 18,
'creator': creator, 'creators': creators,
'uploader': uploader, 'uploader': uploader,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
'categories': categories, 'categories': categories,

View File

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj from ..utils import ExtractorError, int_or_none, traverse_obj
class SwearnetEpisodeIE(InfoExtractor): class SwearnetEpisodeIE(InfoExtractor):
@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') try:
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
except ExtractorError:
if 'Upgrade Now' in webpage:
self.raise_login_required()
raise
json_data = self._download_json( json_data = self._download_json(
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0] f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]

View File

@ -6,7 +6,7 @@ import string
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -15,7 +15,6 @@ from ..utils import (
UserNotLive, UserNotLive,
determine_ext, determine_ext,
format_field, format_field,
get_first,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
merge_dicts, merge_dicts,
@ -219,8 +218,8 @@ class TikTokBaseIE(InfoExtractor):
def extract_addr(addr, add_meta={}): def extract_addr(addr, add_meta={}):
parsed_meta, res = parse_url_key(addr.get('url_key', '')) parsed_meta, res = parse_url_key(addr.get('url_key', ''))
if res: if res:
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height')) known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width')) known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
parsed_meta.update(known_resolutions.get(res, {})) parsed_meta.update(known_resolutions.get(res, {}))
add_meta.setdefault('height', int_or_none(res[:-1])) add_meta.setdefault('height', int_or_none(res[:-1]))
return [{ return [{
@ -237,22 +236,26 @@ class TikTokBaseIE(InfoExtractor):
# Hack: Add direct video links first to prioritize them when removing duplicate formats # Hack: Add direct video links first to prioritize them when removing duplicate formats
formats = [] formats = []
width = int_or_none(video_info.get('width'))
height = int_or_none(video_info.get('height'))
if video_info.get('play_addr'): if video_info.get('play_addr'):
formats.extend(extract_addr(video_info['play_addr'], { formats.extend(extract_addr(video_info['play_addr'], {
'format_id': 'play_addr', 'format_id': 'play_addr',
'format_note': 'Direct video', 'format_note': 'Direct video',
'vcodec': 'h265' if traverse_obj( 'vcodec': 'h265' if traverse_obj(
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002 video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
'width': video_info.get('width'), 'width': width,
'height': video_info.get('height'), 'height': height,
})) }))
if video_info.get('download_addr'): if video_info.get('download_addr'):
formats.extend(extract_addr(video_info['download_addr'], { download_addr = video_info['download_addr']
dl_width = int_or_none(download_addr.get('width'))
formats.extend(extract_addr(download_addr, {
'format_id': 'download_addr', 'format_id': 'download_addr',
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
'vcodec': 'h264', 'vcodec': 'h264',
'width': video_info.get('width'), 'width': dl_width or width,
'height': video_info.get('height'), 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
'preference': -2 if video_info.get('has_watermark') else -1, 'preference': -2 if video_info.get('has_watermark') else -1,
})) }))
if video_info.get('play_addr_h264'): if video_info.get('play_addr_h264'):
@ -311,7 +314,7 @@ class TikTokBaseIE(InfoExtractor):
if is_generic_og_trackname: if is_generic_og_trackname:
music_track, music_author = contained_music_track or 'original sound', contained_music_author music_track, music_author = contained_music_track or 'original sound', contained_music_author
else: else:
music_track, music_author = music_info.get('title'), music_info.get('author') music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str}))
return { return {
'id': aweme_id, 'id': aweme_id,
@ -330,15 +333,16 @@ class TikTokBaseIE(InfoExtractor):
'comment_count': 'comment_count', 'comment_count': 'comment_count',
}, expected_type=int_or_none), }, expected_type=int_or_none),
**traverse_obj(author_info, { **traverse_obj(author_info, {
'uploader': 'unique_id', 'uploader': ('unique_id', {str}),
'uploader_id': 'uid', 'uploader_id': ('uid', {str_or_none}),
'creator': 'nickname', 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
'channel_id': 'sec_uid', 'channel': ('nickname', {str}),
}, expected_type=str_or_none), 'channel_id': ('sec_uid', {str}),
}),
'uploader_url': user_url, 'uploader_url': user_url,
'track': music_track, 'track': music_track,
'album': str_or_none(music_info.get('album')) or None, 'album': str_or_none(music_info.get('album')) or None,
'artist': music_author or None, 'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(aweme_detail, aweme_id), 'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
@ -399,7 +403,8 @@ class TikTokBaseIE(InfoExtractor):
'timestamp': ('createTime', {int_or_none}), 'timestamp': ('createTime', {int_or_none}),
}), }),
**traverse_obj(author_info or aweme_detail, { **traverse_obj(author_info or aweme_detail, {
'creator': ('nickname', {str}), 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
'channel': ('nickname', {str}),
'uploader': (('uniqueId', 'author'), {str}), 'uploader': (('uniqueId', 'author'), {str}),
'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}), 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
}, get_all=False), }, get_all=False),
@ -410,10 +415,10 @@ class TikTokBaseIE(InfoExtractor):
'comment_count': 'commentCount', 'comment_count': 'commentCount',
}, expected_type=int_or_none), }, expected_type=int_or_none),
**traverse_obj(music_info, { **traverse_obj(music_info, {
'track': 'title', 'track': ('title', {str}),
'album': ('album', {lambda x: x or None}), 'album': ('album', {str}, {lambda x: x or None}),
'artist': 'authorName', 'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
}, expected_type=str), }),
'channel_id': channel_id, 'channel_id': channel_id,
'uploader_url': user_url, 'uploader_url': user_url,
'formats': formats, 'formats': formats,
@ -470,7 +475,8 @@ class TikTokIE(TikTokBaseIE):
'uploader_id': '18702747', 'uploader_id': '18702747',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'creator': 'patroX', 'channel': 'patroX',
'creators': ['patroX'],
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'upload_date': '20190930', 'upload_date': '20190930',
'timestamp': 1569860870, 'timestamp': 1569860870,
@ -478,7 +484,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson', 'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'],
'track': 'Big Fun', 'track': 'Big Fun',
}, },
}, { }, {
@ -490,12 +496,13 @@ class TikTokIE(TikTokBaseIE):
'title': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥', 'title': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥', 'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
'uploader': 'barudakhb_', 'uploader': 'barudakhb_',
'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', 'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'uploader_id': '6974687867511718913', 'uploader_id': '6974687867511718913',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'track': 'Boka Dance', 'track': 'Boka Dance',
'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', 'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'timestamp': 1626121503, 'timestamp': 1626121503,
'duration': 18, 'duration': 18,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
@ -514,7 +521,8 @@ class TikTokIE(TikTokBaseIE):
'title': 'Slap and Run!', 'title': 'Slap and Run!',
'description': 'Slap and Run!', 'description': 'Slap and Run!',
'uploader': 'user440922249', 'uploader': 'user440922249',
'creator': 'Slap And Run', 'channel': 'Slap And Run',
'creators': ['Slap And Run'],
'uploader_id': '7036055384943690754', 'uploader_id': '7036055384943690754',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', 'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
@ -538,7 +546,8 @@ class TikTokIE(TikTokBaseIE):
'title': 'TikTok video #7059698374567611694', 'title': 'TikTok video #7059698374567611694',
'description': '', 'description': '',
'uploader': 'pokemonlife22', 'uploader': 'pokemonlife22',
'creator': 'Pokemon', 'channel': 'Pokemon',
'creators': ['Pokemon'],
'uploader_id': '6820838815978423302', 'uploader_id': '6820838815978423302',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
@ -547,7 +556,7 @@ class TikTokIE(TikTokBaseIE):
'duration': 6, 'duration': 6,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'upload_date': '20220201', 'upload_date': '20220201',
'artist': 'Pokemon', 'artists': ['Pokemon'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -584,12 +593,13 @@ class TikTokIE(TikTokBaseIE):
'ext': 'mp3', 'ext': 'mp3',
'title': 'TikTok video #7139980461132074283', 'title': 'TikTok video #7139980461132074283',
'description': '', 'description': '',
'creator': 'Antaura', 'channel': 'Antaura',
'creators': ['Antaura'],
'uploader': '_le_cannibale_', 'uploader': '_le_cannibale_',
'uploader_id': '6604511138619654149', 'uploader_id': '6604511138619654149',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'artist': 'nathan !', 'artists': ['nathan !'],
'track': 'grahamscott canon', 'track': 'grahamscott canon',
'upload_date': '20220905', 'upload_date': '20220905',
'timestamp': 1662406249, 'timestamp': 1662406249,
@ -597,23 +607,24 @@ class TikTokIE(TikTokBaseIE):
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'thumbnail': r're:^https://.+\.webp', 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
}, },
}, { }, {
# only available via web # only available via web
'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME
'md5': '6aba7fad816e8709ff2c149679ace165', 'md5': '6aba7fad816e8709ff2c149679ace165',
'info_dict': { 'info_dict': {
'id': '7206382937372134662', 'id': '7206382937372134662',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'creator': 'MoxyPatch', 'channel': 'MoxyPatch',
'creators': ['MoxyPatch'],
'uploader': 'moxypatch', 'uploader': 'moxypatch',
'uploader_id': '7039142049363379205', 'uploader_id': '7039142049363379205',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', 'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'artist': 'your worst nightmare', 'artists': ['your worst nightmare'],
'track': 'original sound', 'track': 'original sound',
'upload_date': '20230303', 'upload_date': '20230303',
'timestamp': 1677866781, 'timestamp': 1677866781,
@ -628,7 +639,7 @@ class TikTokIE(TikTokBaseIE):
'expected_warnings': ['Unable to find video in feed'], 'expected_warnings': ['Unable to find video in feed'],
}, { }, {
# 1080p format # 1080p format
'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME
'md5': '982512017a8a917124d5a08c8ae79621', 'md5': '982512017a8a917124d5a08c8ae79621',
'info_dict': { 'info_dict': {
'id': '7107337212743830830', 'id': '7107337212743830830',
@ -639,8 +650,9 @@ class TikTokIE(TikTokBaseIE):
'uploader_id': '86328792343818240', 'uploader_id': '86328792343818240',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'creator': 'tate mcrae', 'channel': 'tate mcrae',
'artist': 'tate mcrae', 'creators': ['tate mcrae'],
'artists': ['tate mcrae'],
'track': 'original sound', 'track': 'original sound',
'upload_date': '20220609', 'upload_date': '20220609',
'timestamp': 1654805899, 'timestamp': 1654805899,
@ -652,6 +664,7 @@ class TikTokIE(TikTokBaseIE):
'thumbnail': r're:^https://.+\.webp', 'thumbnail': r're:^https://.+\.webp',
}, },
'params': {'format': 'bytevc1_1080p_808907-0'}, 'params': {'format': 'bytevc1_1080p_808907-0'},
'expected_warnings': ['Unable to find video in feed'],
}, { }, {
# Slideshow, audio-only m4a format # Slideshow, audio-only m4a format
'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594', 'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
@ -665,8 +678,9 @@ class TikTokIE(TikTokBaseIE):
'uploader_id': '6582536342634676230', 'uploader_id': '6582536342634676230',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', 'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
'creator': 'лампочка', 'channel': 'лампочка',
'artist': 'Øneheart', 'creators': ['лампочка'],
'artists': ['Øneheart'],
'album': 'watching the stars', 'album': 'watching the stars',
'track': 'watching the stars', 'track': 'watching the stars',
'upload_date': '20230708', 'upload_date': '20230708',
@ -675,7 +689,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'thumbnail': r're:^https://.+\.webp', 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
}, },
}, { }, {
# Auto-captions available # Auto-captions available
@ -921,20 +935,23 @@ class DouyinIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.douyin.com/video/6961737553342991651', 'url': 'https://www.douyin.com/video/6961737553342991651',
'md5': 'a97db7e3e67eb57bf40735c022ffa228', 'md5': '9ecce7bc5b302601018ecb2871c63a75',
'info_dict': { 'info_dict': {
'id': '6961737553342991651', 'id': '6961737553342991651',
'ext': 'mp4', 'ext': 'mp4',
'title': '#杨超越 小小水手带你去远航❤️', 'title': '#杨超越 小小水手带你去远航❤️',
'description': '#杨超越 小小水手带你去远航❤️', 'description': '#杨超越 小小水手带你去远航❤️',
'uploader': '6897520xka',
'uploader_id': '110403406559', 'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'creator': '杨超越', 'channel': '杨超越',
'duration': 19782, 'creators': ['杨超越'],
'duration': 19,
'timestamp': 1620905839, 'timestamp': 1620905839,
'upload_date': '20210513', 'upload_date': '20210513',
'track': '@杨超越创作的原声', 'track': '@杨超越创作的原声',
'artists': ['杨超越'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -943,20 +960,23 @@ class DouyinIE(TikTokBaseIE):
}, },
}, { }, {
'url': 'https://www.douyin.com/video/6982497745948921092', 'url': 'https://www.douyin.com/video/6982497745948921092',
'md5': '34a87ebff3833357733da3fe17e37c0e', 'md5': '15c5e660b7048af3707304e3cc02bbb5',
'info_dict': { 'info_dict': {
'id': '6982497745948921092', 'id': '6982497745948921092',
'ext': 'mp4', 'ext': 'mp4',
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想', 'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
'uploader': '0731chaoyue',
'uploader_id': '408654318141572', 'uploader_id': '408654318141572',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'creator': '杨超越工作室', 'channel': '杨超越工作室',
'duration': 42479, 'creators': ['杨超越工作室'],
'duration': 42,
'timestamp': 1625739481, 'timestamp': 1625739481,
'upload_date': '20210708', 'upload_date': '20210708',
'track': '@杨超越工作室创作的原声', 'track': '@杨超越工作室创作的原声',
'artists': ['杨超越工作室'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -965,20 +985,23 @@ class DouyinIE(TikTokBaseIE):
}, },
}, { }, {
'url': 'https://www.douyin.com/video/6953975910773099811', 'url': 'https://www.douyin.com/video/6953975910773099811',
'md5': 'dde3302460f19db59c47060ff013b902', 'md5': '0e6443758b8355db9a3c34864a4276be',
'info_dict': { 'info_dict': {
'id': '6953975910773099811', 'id': '6953975910773099811',
'ext': 'mp4', 'ext': 'mp4',
'title': '#一起看海 出现在你的夏日里', 'title': '#一起看海 出现在你的夏日里',
'description': '#一起看海 出现在你的夏日里', 'description': '#一起看海 出现在你的夏日里',
'uploader': '6897520xka',
'uploader_id': '110403406559', 'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'creator': '杨超越', 'channel': '杨超越',
'duration': 17343, 'creators': ['杨超越'],
'duration': 17,
'timestamp': 1619098692, 'timestamp': 1619098692,
'upload_date': '20210422', 'upload_date': '20210422',
'track': '@杨超越创作的原声', 'track': '@杨超越创作的原声',
'artists': ['杨超越'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -1004,20 +1027,23 @@ class DouyinIE(TikTokBaseIE):
'skip': 'No longer available', 'skip': 'No longer available',
}, { }, {
'url': 'https://www.douyin.com/video/6963263655114722595', 'url': 'https://www.douyin.com/video/6963263655114722595',
'md5': 'cf9f11f0ec45d131445ec2f06766e122', 'md5': '1440bcf59d8700f8e014da073a4dfea8',
'info_dict': { 'info_dict': {
'id': '6963263655114722595', 'id': '6963263655114722595',
'ext': 'mp4', 'ext': 'mp4',
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈', 'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
'uploader': '6897520xka',
'uploader_id': '110403406559', 'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'creator': '杨超越', 'channel': '杨超越',
'duration': 15115, 'creators': ['杨超越'],
'duration': 15,
'timestamp': 1621261163, 'timestamp': 1621261163,
'upload_date': '20210517', 'upload_date': '20210517',
'track': '@杨超越创作的原声', 'track': '@杨超越创作的原声',
'artists': ['杨超越'],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'repost_count': int, 'repost_count': int,
@ -1025,34 +1051,23 @@ class DouyinIE(TikTokBaseIE):
'thumbnail': r're:https?://.+\.jpe?g', 'thumbnail': r're:https?://.+\.jpe?g',
}, },
}] }]
_APP_VERSIONS = [('23.3.0', '230300')]
_APP_NAME = 'aweme'
_AID = 1128
_API_HOSTNAME = 'aweme.snssdk.com'
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s' _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
_WEBPAGE_HOST = 'https://www.douyin.com/' _WEBPAGE_HOST = 'https://www.douyin.com/'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
try: detail = traverse_obj(self._download_json(
return self._extract_aweme_app(video_id) 'https://www.douyin.com/aweme/v1/web/aweme/detail/', video_id,
except ExtractorError as e: 'Downloading web detail JSON', 'Failed to download web detail JSON',
e.expected = True query={'aweme_id': video_id}, fatal=False), ('aweme_detail', {dict}))
self.to_screen(f'{e}; trying with webpage') if not detail:
webpage = self._download_webpage(url, video_id)
render_data = self._search_json(
r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
if not render_data:
# TODO: Run verification challenge code to generate signature cookies # TODO: Run verification challenge code to generate signature cookies
cookies = self._get_cookies(self._WEBPAGE_HOST)
expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
raise ExtractorError( raise ExtractorError(
'Fresh cookies (not necessarily logged in) are needed', expected=expected) 'Fresh cookies (not necessarily logged in) are needed',
expected=not self._get_cookies(self._WEBPAGE_HOST).get('s_v_web_id'))
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id) return self._parse_aweme_video_app(detail)
class TikTokVMIE(InfoExtractor): class TikTokVMIE(InfoExtractor):

View File

@ -100,9 +100,13 @@ class TwitterBaseIE(InfoExtractor):
if not variant_url: if not variant_url:
return [], {} return [], {}
elif '.m3u8' in variant_url: elif '.m3u8' in variant_url:
return self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
variant_url, video_id, 'mp4', 'm3u8_native', variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
return fmts, subs
else: else:
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = { f = {
@ -471,6 +475,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'channel_id': '549749560',
'uploader': 'FREE THE NIPPLE', 'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple', 'uploader_id': 'freethenipple',
'duration': 12.922, 'duration': 12.922,
@ -484,6 +489,7 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 18, 'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'], '_old_archive_ids': ['twitter 643211948184596480'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@ -506,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': r're:Star Wars.*A new beginning is coming December 18.*', 'title': r're:Star Wars.*A new beginning is coming December 18.*',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'channel_id': '20106852',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': r're:Star Wars.*', 'uploader': r're:Star Wars.*',
'timestamp': 1447395772, 'timestamp': 1447395772,
@ -551,6 +558,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'channel_id': '1383165541',
'uploader': 'jaydin donte geer', 'uploader': 'jaydin donte geer',
'uploader_id': 'jaydingeer', 'uploader_id': 'jaydingeer',
'duration': 30.0, 'duration': 30.0,
@ -591,6 +599,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
'channel_id': '701615052',
'uploader_id': 'CaptainAmerica', 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America', 'uploader': 'Captain America',
'duration': 3.17, 'duration': 3.17,
@ -627,6 +636,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'channel_id': '2526757026',
'uploader': 'عالم الأخبار', 'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm', 'uploader_id': 'news_al3alm',
'duration': 277.4, 'duration': 277.4,
@ -651,6 +661,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'channel_id': '2319432498',
'uploader': 'Préfet de Guadeloupe', 'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971', 'uploader_id': 'Prefet971',
'duration': 47.48, 'duration': 47.48,
@ -677,6 +688,7 @@ class TwitterIE(TwitterBaseIE):
'title': 're:.*?Shep is on a roll today.*?', 'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'channel_id': '255036353',
'uploader': 'Lis Power', 'uploader': 'Lis Power',
'uploader_id': 'LisPower1', 'uploader_id': 'LisPower1',
'duration': 111.278, 'duration': 111.278,
@ -741,6 +753,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
'channel_id': '18552281',
'uploader': 'Brooklyn Nets', 'uploader': 'Brooklyn Nets',
'uploader_id': 'BrooklynNets', 'uploader_id': 'BrooklynNets',
'duration': 324.484, 'duration': 324.484,
@ -763,10 +776,11 @@ class TwitterIE(TwitterBaseIE):
'id': '1577855447914409984', 'id': '1577855447914409984',
'display_id': '1577855540407197696', 'display_id': '1577855540407197696',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9d198efb93557b8f8d5b78c480407214', 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
'description': 'md5:b9c3699335447391d11753ab21c70a74', 'description': 'md5:b9c3699335447391d11753ab21c70a74',
'upload_date': '20221006', 'upload_date': '20221006',
'uploader': 'oshtru', 'channel_id': '143077138',
'uploader': 'Oshtru',
'uploader_id': 'oshtru', 'uploader_id': 'oshtru',
'uploader_url': 'https://twitter.com/oshtru', 'uploader_url': 'https://twitter.com/oshtru',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
@ -784,9 +798,10 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': { 'info_dict': {
'id': '1577719286659006464', 'id': '1577719286659006464',
'title': 'Ultima - Test', 'title': 'Ultima Reload - Test',
'description': 'Test https://t.co/Y3KEZD7Dad', 'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima', 'channel_id': '168922496',
'uploader': 'Ultima Reload',
'uploader_id': 'UltimaShadowX', 'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005', 'upload_date': '20221005',
@ -808,6 +823,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:95aea692fda36a12081b9629b02daa92', 'description': 'md5:95aea692fda36a12081b9629b02daa92',
'channel_id': '1094109584',
'uploader': 'Max Olson', 'uploader': 'Max Olson',
'uploader_id': 'MesoMax919', 'uploader_id': 'MesoMax919',
'uploader_url': 'https://twitter.com/MesoMax919', 'uploader_url': 'https://twitter.com/MesoMax919',
@ -830,6 +846,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'description': str, 'description': str,
'channel_id': '1217167793541480450',
'uploader': str, 'uploader': str,
'uploader_id': 'Rizdraws', 'uploader_id': 'Rizdraws',
'uploader_url': 'https://twitter.com/Rizdraws', 'uploader_url': 'https://twitter.com/Rizdraws',
@ -840,7 +857,8 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
'tags': [] 'tags': [],
'_old_archive_ids': ['twitter 1575199173472927762'],
}, },
'params': {'skip_download': 'The media could not be played'}, 'params': {'skip_download': 'The media could not be played'},
'skip': 'Requires authentication', 'skip': 'Requires authentication',
@ -852,6 +870,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1395079556562706435', 'id': '1395079556562706435',
'title': str, 'title': str,
'tags': [], 'tags': [],
'channel_id': '21539378',
'uploader': str, 'uploader': str,
'like_count': int, 'like_count': int,
'upload_date': '20210519', 'upload_date': '20210519',
@ -869,6 +888,7 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '1578353380363501568', 'id': '1578353380363501568',
'title': str, 'title': str,
'channel_id': '2195866214',
'uploader_id': 'DavidToons_', 'uploader_id': 'DavidToons_',
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -888,6 +908,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1578401165338976258', 'id': '1578401165338976258',
'title': str, 'title': str,
'description': 'md5:659a6b517a034b4cee5d795381a2dc41', 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
'channel_id': '19338359',
'uploader': str, 'uploader': str,
'uploader_id': 'primevideouk', 'uploader_id': 'primevideouk',
'timestamp': 1665155137, 'timestamp': 1665155137,
@ -929,6 +950,7 @@ class TwitterIE(TwitterBaseIE):
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int, 'comment_count': int,
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'channel_id': '80082014',
'repost_count': int, 'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208', 'upload_date': '20221208',
@ -946,6 +968,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1', 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0, 'timestamp': 1670459604.0,
'channel_id': '80082014',
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw', 'uploader': 'Jocelyn Laidlaw',
'repost_count': int, 'repost_count': int,
@ -972,6 +995,7 @@ class TwitterIE(TwitterBaseIE):
'title': '뽀 - 아 최우제 이동속도 봐', 'title': '뽀 - 아 최우제 이동속도 봐',
'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB', 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
'duration': 24.598, 'duration': 24.598,
'channel_id': '1281839411068432384',
'uploader': '', 'uploader': '',
'uploader_id': 's2FAKER', 'uploader_id': 's2FAKER',
'uploader_url': 'https://twitter.com/s2FAKER', 'uploader_url': 'https://twitter.com/s2FAKER',
@ -985,6 +1009,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'], '_old_archive_ids': ['twitter 1621117700482416640'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2', 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
'info_dict': { 'info_dict': {
@ -992,6 +1017,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1599108751385972737', 'display_id': '1599108751385972737',
'ext': 'mp4', 'ext': 'mp4',
'title': '\u06ea - \U0001F48B', 'title': '\u06ea - \U0001F48B',
'channel_id': '1347791436809441283',
'uploader_url': 'https://twitter.com/hlo_again', 'uploader_url': 'https://twitter.com/hlo_again',
'like_count': int, 'like_count': int,
'uploader_id': 'hlo_again', 'uploader_id': 'hlo_again',
@ -1014,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1600009362759733248', 'id': '1600009362759733248',
'display_id': '1600009574919962625', 'display_id': '1600009574919962625',
'ext': 'mp4', 'ext': 'mp4',
'channel_id': '211814412',
'uploader_url': 'https://twitter.com/MunTheShinobi', 'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
@ -1061,6 +1088,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1084,6 +1112,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1117,7 +1146,7 @@ class TwitterIE(TwitterBaseIE):
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
}, { }, {
# Animated gif and quote tweet video, with syndication API # Animated gif and quote tweet video
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950', 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
'playlist_mincount': 2, 'playlist_mincount': 2,
'info_dict': { 'info_dict': {
@ -1125,6 +1154,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'BAKOON - https://t.co/zom968d0a0', 'title': 'BAKOON - https://t.co/zom968d0a0',
'description': 'https://t.co/zom968d0a0', 'description': 'https://t.co/zom968d0a0',
'tags': [], 'tags': [],
'channel_id': '1263540390',
'uploader': 'BAKOON', 'uploader': 'BAKOON',
'uploader_id': 'BAKKOOONN', 'uploader_id': 'BAKKOOONN',
'uploader_url': 'https://twitter.com/BAKKOOONN', 'uploader_url': 'https://twitter.com/BAKKOOONN',
@ -1132,19 +1162,21 @@ class TwitterIE(TwitterBaseIE):
'timestamp': 1693254077.0, 'timestamp': 1693254077.0,
'upload_date': '20230828', 'upload_date': '20230828',
'like_count': int, 'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}}, 'skip': 'Requires authentication',
'expected_warnings': ['Not all metadata'],
}, { }, {
# "stale tweet" with typename "TweetWithVisibilityResults" # "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154', 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536', 'md5': '511377ff8dfa7545307084dca4dce319',
'info_dict': { 'info_dict': {
'id': '1724883339285544960', 'id': '1724883339285544960',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c', 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164', 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154', 'display_id': '1724884212803834154',
'channel_id': '337808606',
'uploader': 'Robert F. Kennedy Jr', 'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr', 'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr', 'uploader_url': 'https://twitter.com/RobertKennedyJr',
@ -1386,6 +1418,7 @@ class TwitterIE(TwitterBaseIE):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')), 'timestamp': unified_timestamp(status.get('created_at')),
'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'), 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')), 'like_count': int_or_none(status.get('favorite_count')),

View File

@ -48,17 +48,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
return url, data, headers return url, data, headers
def _perform_login(self, username, password): def _perform_login(self, username, password):
webpage = self._download_webpage( viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
self._LOGIN_URL, None, 'Downloading login page')
token, vuid = self._extract_xsrft_and_vuid(webpage)
data = { data = {
'action': 'login', 'action': 'login',
'email': username, 'email': username,
'password': password, 'password': password,
'service': 'vimeo', 'service': 'vimeo',
'token': token, 'token': viewer['xsrft'],
} }
self._set_vimeo_cookie('vuid', vuid) self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
self._download_webpage( self._download_webpage(
self._LOGIN_URL, None, 'Logging in', self._LOGIN_URL, None, 'Logging in',

View File

@ -3640,15 +3640,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients) return orderedSet(requested_clients)
def _invalid_player_response(self, pr, video_id):
# YouTube may return a different video player response than expected.
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None initial_pr = None
if webpage: if webpage:
initial_pr = self._search_json( initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
prs = []
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
prs.append({**initial_pr, 'streamingData': None})
all_clients = set(clients) all_clients = set(clients)
clients = clients[::-1] clients = clients[::-1]
prs = []
def append_client(*client_names): def append_client(*client_names):
""" Append the first client name that exists but not already used """ """ Append the first client name that exists but not already used """
@ -3660,18 +3673,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
all_clients.add(actual_client) all_clients.add(actual_client)
return return
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# stripped out even if not requested by the user
# See: https://github.com/yt-dlp/yt-dlp/issues/501
if initial_pr:
pr = dict(initial_pr)
pr['streamingData'] = None
prs.append(pr)
last_error = None
tried_iframe_fallback = False tried_iframe_fallback = False
player_url = None player_url = None
skipped_clients = {}
while clients: while clients:
client, base_client, variant = _split_innertube_client(clients.pop()) client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {} player_ytcfg = master_ytcfg if client == 'web' else {}
@ -3692,26 +3696,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e: except ExtractorError as e:
if last_error: self.report_warning(e)
self.report_warning(last_error)
last_error = e
continue continue
if pr: if pr_id := self._invalid_player_response(pr, video_id):
# YouTube may return a different video player response than expected. skipped_clients[client] = pr_id
# See: https://github.com/TeamNewPipe/NewPipe/issues/8713 elif pr:
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) # Save client name for introspection later
if pr_video_id and pr_video_id != video_id: name = short_client_name(client)
self.report_warning( sd = traverse_obj(pr, ('streamingData', {dict})) or {}
f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message()) sd[STREAMING_DATA_CLIENT_NAME] = name
else: for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
# Save client name for introspection later f[STREAMING_DATA_CLIENT_NAME] = name
name = short_client_name(client) prs.append(pr)
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
sd[STREAMING_DATA_CLIENT_NAME] = name
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
@ -3722,10 +3719,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif not variant: elif not variant:
append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
if last_error: if skipped_clients:
if not len(prs): self.report_warning(
raise last_error f'Skipping player responses from {"/".join(skipped_clients)} clients '
self.report_warning(last_error) f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
if not prs:
raise ExtractorError(
'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
elif not prs:
raise ExtractorError('Failed to extract any player response')
return prs, player_url return prs, player_url
def _needs_live_processing(self, live_status, duration): def _needs_live_processing(self, live_status, duration):

View File

@ -68,6 +68,7 @@ class RequestDirector:
def close(self): def close(self):
for handler in self.handlers.values(): for handler in self.handlers.values():
handler.close() handler.close()
self.handlers = {}
def add_handler(self, handler: RequestHandler): def add_handler(self, handler: RequestHandler):
"""Add a handler. If a handler of the same RH_KEY exists, it will overwrite it""" """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""

View File

@ -196,9 +196,12 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
raise raise
return self.check_values(self.values, self.largs) return self.check_values(self.values, self.largs)
def error(self, msg): def _generate_error_message(self, msg):
msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n' msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg) return f'{self.get_usage()}\n{msg}' if self.usage else msg
def error(self, msg):
raise optparse.OptParseError(self._generate_error_message(msg))
def _get_args(self, args): def _get_args(self, args):
return sys.argv[1:] if args is None else list(args) return sys.argv[1:] if args is None else list(args)

View File

@ -86,11 +86,14 @@ class PluginFinder(importlib.abc.MetaPathFinder):
parts = Path(*fullname.split('.')) parts = Path(*fullname.split('.'))
for path in orderedSet(candidate_locations, lazy=True): for path in orderedSet(candidate_locations, lazy=True):
candidate = path / parts candidate = path / parts
if candidate.is_dir(): try:
yield candidate if candidate.is_dir():
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate yield candidate
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate
except PermissionError as e:
write_string(f'Permission error while accessing modules in "{e.filename}"\n')
def find_spec(self, fullname, path=None, target=None): def find_spec(self, fullname, path=None, target=None):
if fullname not in self.packages: if fullname not in self.packages: