1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-10 17:00:12 +01:00

Merge branch 'yt-dlp:master' into cleanup/2024-01

This commit is contained in:
bashonly 2024-02-24 13:13:09 -06:00
commit 714d11bff3
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
18 changed files with 368 additions and 145 deletions

View File

@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-linux_${{ matrix.architecture }} name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0 compression-level: 0
@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0 compression-level: 0
@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0 compression-level: 0
@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-* pattern: build-bin-*
merge-multiple: true merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
@ -484,3 +484,4 @@ jobs:
_update_spec _update_spec
SHA*SUMS* SHA*SUMS*
compression-level: 0 compression-level: 0
overwrite: true

View File

@ -1311,7 +1311,8 @@ The available fields are:
- `display_id` (string): An alternative identifier for the video - `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader - `uploader` (string): Full name of the video uploader
- `license` (string): License name the video is licensed under - `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video - `creators` (list): The creators of the video
- `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available - `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@ -1385,11 +1386,16 @@ Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track - `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc - `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track - `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track - `artists` (list): Artist(s) of the track
- `genre` (string): Genre(s) of the track - `artist` (string): Artist(s) of the track; comma-separated
- `genres` (list): Genre(s) of the track
- `genre` (string): Genre(s) of the track; comma-separated
- `composers` (list): Composer(s) of the piece
- `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to - `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album - `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album - `album_artists` (list): All artists appeared on the album
- `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1767,10 +1773,11 @@ Metadata fields | From
`description`, `synopsis` | `description` `description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url` `purl`, `comment` | `webpage_url`
`track` | `track_number` `track` | `track_number`
`artist` | `artist`, `creator`, `uploader` or `uploader_id` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`genre` | `genre` `composer` | `composer` or `composers`
`genre` | `genre` or `genres`
`album` | `album` `album` | `album`
`album_artist` | `album_artist` `album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number` `disc` | `disc_number`
`show` | `series` `show` | `series`
`season_number` | `season_number` `season_number` | `season_number`

View File

@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'): if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id') test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date # release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year') test_info_dict.pop('release_year')

View File

@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos() res = get_videos()

View File

@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = { _format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio), 'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -2640,6 +2647,14 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
elif old_value := info_dict.get(old_key):
info_dict[new_key] = old_value.split(', ')
elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info): def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None) err = info.pop('__pending_error', None)
if err: if err:

View File

@ -14,7 +14,7 @@ import os
import re import re
import traceback import traceback
from .compat import compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')

View File

@ -379,7 +379,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import ( from .cnbc import (
CNBCIE,
CNBCVideoIE, CNBCVideoIE,
) )
from .cnn import ( from .cnn import (

View File

@ -1,68 +1,97 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714',
'info_dict': {
'id': '3000503714',
'ext': 'mp4',
'title': 'Fighting zombies is big business',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
'timestamp': 1459332000,
'upload_date': '20160330',
'uploader': 'NBCU-CNBC',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', _TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': { 'info_dict': {
'id': '7000031301',
'ext': 'mp4', 'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates", 'id': '107344774',
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'timestamp': 1531958400, 'modified_timestamp': 1702053483,
'upload_date': '20180719', 'timestamp': 1701977810,
'uploader': 'NBCU-CNBC', 'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'author': 'Sean Conlon',
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
'skip_download': True, }, {
'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4',
'id': '107345451',
'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'timestamp': 1702080139,
'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
}, },
'skip': 'Dead link', 'expected_warnings': ['Unable to download f4m manifest'],
} }, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'author': 'Jim Cramer',
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
video_id = self._download_json( webpage = self._download_webpage(url, display_id)
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
'query': '''{
page(path: "%s") { player_data = traverse_obj(data, (
vcpsId 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
} lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
}''' % path,
})['data']['page']['vcpsId'] return {
return self.url_result( 'id': display_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id, 'display_id': display_id,
CNBCIE.ie_key()) 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'author': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}, get_all=False),
}

View File

@ -280,7 +280,7 @@ class InfoExtractor:
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.
creator: The creator of the video. creators: List of creators of the video.
timestamp: UNIX timestamp of the moment the video was uploaded timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date in UTC (YYYYMMDD). upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp If not explicitly set, calculated from timestamp
@ -424,16 +424,16 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer. track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string. as a unicode string.
artist: Artist(s) of the track. artists: List of artists of the track.
genre: Genre(s) of the track. composers: List of composers of the piece.
genres: List of genres of the track.
album: Title of the album the track belongs to. album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g. album_artists: List of all artists appeared on the album.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
and compilations). Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to, disc_number: Number of the disc or other physical medium the track belongs to,
as an integer. as an integer.
composer: Composer of the piece
The following fields should only be set for clips that should be cut from the original video: The following fields should only be set for clips that should be cut from the original video:
@ -444,6 +444,18 @@ class InfoExtractor:
rows: Number of rows in each storyboard fragment, as an integer rows: Number of rows in each storyboard fragment, as an integer
columns: Number of columns in each storyboard fragment, as an integer columns: Number of columns in each storyboard fragment, as an integer
The following fields are deprecated and should not be set by new code:
composer: Use "composers" instead.
Composer(s) of the piece, comma-separated.
artist: Use "artists" instead.
Artist(s) of the track, comma-separated.
genre: Use "genres" instead.
Genre(s) of the track, comma-separated.
album_artist: Use "album_artists" instead.
All artists appeared on the album, comma-separated.
creator: Use "creators" instead.
The creator of the video.
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information. Unless mentioned otherwise, None is equivalent to absence of information.

View File

@ -1,6 +1,7 @@
import itertools import itertools
import json import json
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
class NebulaIE(NebulaBaseIE): class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': { 'info_dict': {
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
class NebulaClassIE(NebulaBaseIE): class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class' IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)' _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': { 'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video', 'title': 'Photos, Sculpture, and Video',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
metadata = self._call_api( metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons', f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata') slug, note='Fetching class/podcast metadata')
return { content_type = metadata.get('type')
**self._extract_video_metadata(metadata), if content_type == 'lesson':
**self._extract_formats(metadata['id'], slug), return {
} **self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE): class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions' IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)' _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/myshows', 'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1, 'playlist_mincount': 1,
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE): class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel' IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])' _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money', 'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': { 'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742', 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
}, },
'playlist_count': 23, 'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}] }]
def _generate_playlist_entries(self, collection_id, collection_slug): def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}', lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata) {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url): def _real_extract(self, url):
collection_slug = self._match_id(url) collection_slug = self._match_id(url)
channel = self._call_api( channel = self._call_api(
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
if channel.get('type') == 'class': if channel.get('type') == 'class':
entries = self._generate_class_entries(channel) entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else: else:
entries = self._generate_playlist_entries(channel['id'], collection_slug) entries = self._generate_playlist_entries(channel['id'], collection_slug)

View File

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor): class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed', 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 5500,
} }
def _extract_video(self, feed_entry):
return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
def _real_extract(self, url): def _real_extract(self, url):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') video_id = 'nerdcubed-feed'
feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
entries = [{ return self.playlist_result(
'_type': 'url', map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
'title': feed_entry['title'], video_id, 'nerdcubed.co.uk feed')
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return {
'_type': 'playlist',
'title': 'nerdcubed.co.uk feed',
'id': 'nerdcubed-feed',
'entries': entries,
}

View File

@ -12,6 +12,8 @@ from ..compat import compat_str
class OpenRecBaseIE(InfoExtractor): class OpenRecBaseIE(InfoExtractor):
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
def _extract_pagestore(self, webpage, video_id): def _extract_pagestore(self, webpage, video_id):
return self._parse_json( return self._parse_json(
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id) self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
@ -21,7 +23,7 @@ class OpenRecBaseIE(InfoExtractor):
if not m3u8_url: if not m3u8_url:
continue continue
yield from self._extract_m3u8_formats( yield from self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id=name) m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
def _extract_movie(self, webpage, video_id, name, is_live): def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id) window_stores = self._extract_pagestore(webpage, video_id)
@ -60,6 +62,7 @@ class OpenRecBaseIE(InfoExtractor):
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live, 'is_live': is_live,
'http_headers': self._M3U8_HEADERS,
} }
@ -110,7 +113,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
raise ExtractorError('Cannot extract title') raise ExtractorError('Cannot extract title')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4') capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
return { return {
'id': video_id, 'id': video_id,
@ -121,6 +124,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
'upload_date': unified_strdate(capture_data.get('createdAt')), 'upload_date': unified_strdate(capture_data.get('createdAt')),
'http_headers': self._M3U8_HEADERS,
} }

View File

@ -9,7 +9,6 @@ from ..utils import (
get_element_html_by_class, get_element_html_by_class,
get_elements_by_class, get_elements_by_class,
int_or_none, int_or_none,
join_nonempty,
parse_count, parse_count,
parse_duration, parse_duration,
unescapeHTML, unescapeHTML,
@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'timestamp': 1640131200, 'timestamp': 1640131200,
'description': '', 'description': '',
'creator': 'WildeerStudio', 'creators': ['WildeerStudio'],
'upload_date': '20211222', 'upload_date': '20211222',
'uploader': 'CerZule', 'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/', 'uploader_url': 'https://rule34video.com/members/36281/',
@ -81,13 +80,13 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality, 'quality': quality,
}) })
categories, creator, uploader, uploader_url = [None] * 4 categories, creators, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage): for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col)) label = clean_html(get_element_by_class('label', col))
if label == 'Categories:': if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col))) categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:': elif label == 'Artist:':
creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ') creators = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Uploaded By:': elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col)) uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
@ -115,7 +114,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int_or_none(self._search_regex( 'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18, 'age_limit': 18,
'creator': creator, 'creators': creators,
'uploader': uploader, 'uploader': uploader,
'uploader_url': uploader_url, 'uploader_url': uploader_url,
'categories': categories, 'categories': categories,

View File

@ -100,9 +100,13 @@ class TwitterBaseIE(InfoExtractor):
if not variant_url: if not variant_url:
return [], {} return [], {}
elif '.m3u8' in variant_url: elif '.m3u8' in variant_url:
return self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
variant_url, video_id, 'mp4', 'm3u8_native', variant_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
return fmts, subs
else: else:
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = { f = {
@ -471,6 +475,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!', 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ', 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'channel_id': '549749560',
'uploader': 'FREE THE NIPPLE', 'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple', 'uploader_id': 'freethenipple',
'duration': 12.922, 'duration': 12.922,
@ -484,6 +489,7 @@ class TwitterIE(TwitterBaseIE):
'age_limit': 18, 'age_limit': 18,
'_old_archive_ids': ['twitter 643211948184596480'], '_old_archive_ids': ['twitter 643211948184596480'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1', 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
'md5': 'f36dcd5fb92bf7057f155e7d927eeb42', 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
@ -506,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': r're:Star Wars.*A new beginning is coming December 18.*', 'title': r're:Star Wars.*A new beginning is coming December 18.*',
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ', 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'channel_id': '20106852',
'uploader_id': 'starwars', 'uploader_id': 'starwars',
'uploader': r're:Star Wars.*', 'uploader': r're:Star Wars.*',
'timestamp': 1447395772, 'timestamp': 1447395772,
@ -551,6 +558,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel', 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ', 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'channel_id': '1383165541',
'uploader': 'jaydin donte geer', 'uploader': 'jaydin donte geer',
'uploader_id': 'jaydingeer', 'uploader_id': 'jaydingeer',
'duration': 30.0, 'duration': 30.0,
@ -591,6 +599,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.', 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI', 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
'channel_id': '701615052',
'uploader_id': 'CaptainAmerica', 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America', 'uploader': 'Captain America',
'duration': 3.17, 'duration': 3.17,
@ -627,6 +636,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة', 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN', 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'channel_id': '2526757026',
'uploader': 'عالم الأخبار', 'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm', 'uploader_id': 'news_al3alm',
'duration': 277.4, 'duration': 277.4,
@ -651,6 +661,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.', 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo', 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'channel_id': '2319432498',
'uploader': 'Préfet de Guadeloupe', 'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971', 'uploader_id': 'Prefet971',
'duration': 47.48, 'duration': 47.48,
@ -677,6 +688,7 @@ class TwitterIE(TwitterBaseIE):
'title': 're:.*?Shep is on a roll today.*?', 'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09', 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'channel_id': '255036353',
'uploader': 'Lis Power', 'uploader': 'Lis Power',
'uploader_id': 'LisPower1', 'uploader_id': 'LisPower1',
'duration': 111.278, 'duration': 111.278,
@ -741,6 +753,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba', 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:71ead15ec44cee55071547d6447c6a3e', 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
'channel_id': '18552281',
'uploader': 'Brooklyn Nets', 'uploader': 'Brooklyn Nets',
'uploader_id': 'BrooklynNets', 'uploader_id': 'BrooklynNets',
'duration': 324.484, 'duration': 324.484,
@ -763,10 +776,11 @@ class TwitterIE(TwitterBaseIE):
'id': '1577855447914409984', 'id': '1577855447914409984',
'display_id': '1577855540407197696', 'display_id': '1577855540407197696',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9d198efb93557b8f8d5b78c480407214', 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
'description': 'md5:b9c3699335447391d11753ab21c70a74', 'description': 'md5:b9c3699335447391d11753ab21c70a74',
'upload_date': '20221006', 'upload_date': '20221006',
'uploader': 'oshtru', 'channel_id': '143077138',
'uploader': 'Oshtru',
'uploader_id': 'oshtru', 'uploader_id': 'oshtru',
'uploader_url': 'https://twitter.com/oshtru', 'uploader_url': 'https://twitter.com/oshtru',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
@ -784,9 +798,10 @@ class TwitterIE(TwitterBaseIE):
'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464', 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
'info_dict': { 'info_dict': {
'id': '1577719286659006464', 'id': '1577719286659006464',
'title': 'Ultima - Test', 'title': 'Ultima Reload - Test',
'description': 'Test https://t.co/Y3KEZD7Dad', 'description': 'Test https://t.co/Y3KEZD7Dad',
'uploader': 'Ultima', 'channel_id': '168922496',
'uploader': 'Ultima Reload',
'uploader_id': 'UltimaShadowX', 'uploader_id': 'UltimaShadowX',
'uploader_url': 'https://twitter.com/UltimaShadowX', 'uploader_url': 'https://twitter.com/UltimaShadowX',
'upload_date': '20221005', 'upload_date': '20221005',
@ -808,6 +823,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9', 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'description': 'md5:95aea692fda36a12081b9629b02daa92', 'description': 'md5:95aea692fda36a12081b9629b02daa92',
'channel_id': '1094109584',
'uploader': 'Max Olson', 'uploader': 'Max Olson',
'uploader_id': 'MesoMax919', 'uploader_id': 'MesoMax919',
'uploader_url': 'https://twitter.com/MesoMax919', 'uploader_url': 'https://twitter.com/MesoMax919',
@ -830,6 +846,7 @@ class TwitterIE(TwitterBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'description': str, 'description': str,
'channel_id': '1217167793541480450',
'uploader': str, 'uploader': str,
'uploader_id': 'Rizdraws', 'uploader_id': 'Rizdraws',
'uploader_url': 'https://twitter.com/Rizdraws', 'uploader_url': 'https://twitter.com/Rizdraws',
@ -840,7 +857,8 @@ class TwitterIE(TwitterBaseIE):
'repost_count': int, 'repost_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 18, 'age_limit': 18,
'tags': [] 'tags': [],
'_old_archive_ids': ['twitter 1575199173472927762'],
}, },
'params': {'skip_download': 'The media could not be played'}, 'params': {'skip_download': 'The media could not be played'},
'skip': 'Requires authentication', 'skip': 'Requires authentication',
@ -852,6 +870,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1395079556562706435', 'id': '1395079556562706435',
'title': str, 'title': str,
'tags': [], 'tags': [],
'channel_id': '21539378',
'uploader': str, 'uploader': str,
'like_count': int, 'like_count': int,
'upload_date': '20210519', 'upload_date': '20210519',
@ -869,6 +888,7 @@ class TwitterIE(TwitterBaseIE):
'info_dict': { 'info_dict': {
'id': '1578353380363501568', 'id': '1578353380363501568',
'title': str, 'title': str,
'channel_id': '2195866214',
'uploader_id': 'DavidToons_', 'uploader_id': 'DavidToons_',
'repost_count': int, 'repost_count': int,
'like_count': int, 'like_count': int,
@ -888,6 +908,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1578401165338976258', 'id': '1578401165338976258',
'title': str, 'title': str,
'description': 'md5:659a6b517a034b4cee5d795381a2dc41', 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
'channel_id': '19338359',
'uploader': str, 'uploader': str,
'uploader_id': 'primevideouk', 'uploader_id': 'primevideouk',
'timestamp': 1665155137, 'timestamp': 1665155137,
@ -929,6 +950,7 @@ class TwitterIE(TwitterBaseIE):
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c', 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
'comment_count': int, 'comment_count': int,
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'channel_id': '80082014',
'repost_count': int, 'repost_count': int,
'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'], 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
'upload_date': '20221208', 'upload_date': '20221208',
@ -946,6 +968,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1', 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1670459604.0, 'timestamp': 1670459604.0,
'channel_id': '80082014',
'uploader_id': 'CTVJLaidlaw', 'uploader_id': 'CTVJLaidlaw',
'uploader': 'Jocelyn Laidlaw', 'uploader': 'Jocelyn Laidlaw',
'repost_count': int, 'repost_count': int,
@ -972,6 +995,7 @@ class TwitterIE(TwitterBaseIE):
'title': '뽀 - 아 최우제 이동속도 봐', 'title': '뽀 - 아 최우제 이동속도 봐',
'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB', 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
'duration': 24.598, 'duration': 24.598,
'channel_id': '1281839411068432384',
'uploader': '', 'uploader': '',
'uploader_id': 's2FAKER', 'uploader_id': 's2FAKER',
'uploader_url': 'https://twitter.com/s2FAKER', 'uploader_url': 'https://twitter.com/s2FAKER',
@ -985,6 +1009,7 @@ class TwitterIE(TwitterBaseIE):
'comment_count': int, 'comment_count': int,
'_old_archive_ids': ['twitter 1621117700482416640'], '_old_archive_ids': ['twitter 1621117700482416640'],
}, },
'skip': 'Requires authentication',
}, { }, {
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2', 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
'info_dict': { 'info_dict': {
@ -992,6 +1017,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1599108751385972737', 'display_id': '1599108751385972737',
'ext': 'mp4', 'ext': 'mp4',
'title': '\u06ea - \U0001F48B', 'title': '\u06ea - \U0001F48B',
'channel_id': '1347791436809441283',
'uploader_url': 'https://twitter.com/hlo_again', 'uploader_url': 'https://twitter.com/hlo_again',
'like_count': int, 'like_count': int,
'uploader_id': 'hlo_again', 'uploader_id': 'hlo_again',
@ -1014,6 +1040,7 @@ class TwitterIE(TwitterBaseIE):
'id': '1600009362759733248', 'id': '1600009362759733248',
'display_id': '1600009574919962625', 'display_id': '1600009574919962625',
'ext': 'mp4', 'ext': 'mp4',
'channel_id': '211814412',
'uploader_url': 'https://twitter.com/MunTheShinobi', 'uploader_url': 'https://twitter.com/MunTheShinobi',
'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml', 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig', 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
@ -1061,6 +1088,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1084,6 +1112,7 @@ class TwitterIE(TwitterBaseIE):
'display_id': '1695424220702888009', 'display_id': '1695424220702888009',
'title': 'md5:e8daa9527bc2b947121395494f786d9d', 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
'description': 'md5:004f2d37fd58737724ec75bc7e679938', 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
'channel_id': '15212187',
'uploader': 'Benny Johnson', 'uploader': 'Benny Johnson',
'uploader_id': 'bennyjohnson', 'uploader_id': 'bennyjohnson',
'uploader_url': 'https://twitter.com/bennyjohnson', 'uploader_url': 'https://twitter.com/bennyjohnson',
@ -1117,7 +1146,7 @@ class TwitterIE(TwitterBaseIE):
}, },
'add_ie': ['TwitterBroadcast'], 'add_ie': ['TwitterBroadcast'],
}, { }, {
# Animated gif and quote tweet video, with syndication API # Animated gif and quote tweet video
'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950', 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
'playlist_mincount': 2, 'playlist_mincount': 2,
'info_dict': { 'info_dict': {
@ -1125,6 +1154,7 @@ class TwitterIE(TwitterBaseIE):
'title': 'BAKOON - https://t.co/zom968d0a0', 'title': 'BAKOON - https://t.co/zom968d0a0',
'description': 'https://t.co/zom968d0a0', 'description': 'https://t.co/zom968d0a0',
'tags': [], 'tags': [],
'channel_id': '1263540390',
'uploader': 'BAKOON', 'uploader': 'BAKOON',
'uploader_id': 'BAKKOOONN', 'uploader_id': 'BAKKOOONN',
'uploader_url': 'https://twitter.com/BAKKOOONN', 'uploader_url': 'https://twitter.com/BAKKOOONN',
@ -1132,19 +1162,21 @@ class TwitterIE(TwitterBaseIE):
'timestamp': 1693254077.0, 'timestamp': 1693254077.0,
'upload_date': '20230828', 'upload_date': '20230828',
'like_count': int, 'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
'params': {'extractor_args': {'twitter': {'api': ['syndication']}}}, 'skip': 'Requires authentication',
'expected_warnings': ['Not all metadata'],
}, { }, {
# "stale tweet" with typename "TweetWithVisibilityResults" # "stale tweet" with typename "TweetWithVisibilityResults"
'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154', 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
'md5': '62b1e11cdc2cdd0e527f83adb081f536', 'md5': '511377ff8dfa7545307084dca4dce319',
'info_dict': { 'info_dict': {
'id': '1724883339285544960', 'id': '1724883339285544960',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:cc56716f9ed0b368de2ba54c478e493c', 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164', 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
'display_id': '1724884212803834154', 'display_id': '1724884212803834154',
'channel_id': '337808606',
'uploader': 'Robert F. Kennedy Jr', 'uploader': 'Robert F. Kennedy Jr',
'uploader_id': 'RobertKennedyJr', 'uploader_id': 'RobertKennedyJr',
'uploader_url': 'https://twitter.com/RobertKennedyJr', 'uploader_url': 'https://twitter.com/RobertKennedyJr',
@ -1386,6 +1418,7 @@ class TwitterIE(TwitterBaseIE):
'description': description, 'description': description,
'uploader': uploader, 'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')), 'timestamp': unified_timestamp(status.get('created_at')),
'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'), 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')), 'like_count': int_or_none(status.get('favorite_count')),

View File

@ -2068,7 +2068,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'Voyeur Girl', 'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312', 'upload_date': '20190312',
'artist': 'Stephen', 'artists': ['Stephen'],
'creators': ['Stephen'],
'track': 'Voyeur Girl', 'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear', 'album': 'it\'s too much love to know my dear',
'release_date': '20190313', 'release_date': '20190313',
@ -2081,7 +2082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel': 'Stephen', # TODO: should be "Stephen - Topic" 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
'uploader': 'Stephen', 'uploader': 'Stephen',
'availability': 'public', 'availability': 'public',
'creator': 'Stephen',
'duration': 169, 'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0, 'age_limit': 0,
@ -4386,7 +4386,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
release_year = release_date[:4] release_year = release_date[:4]
info.update({ info.update({
'album': mobj.group('album'.strip()), 'album': mobj.group('album'.strip()),
'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), 'artists': ([a] if (a := mobj.group('clean_artist'))
else [a.strip() for a in mobj.group('artist').split('·')]),
'track': mobj.group('track').strip(), 'track': mobj.group('track').strip(),
'release_date': release_date, 'release_date': release_date,
'release_year': int_or_none(release_year), 'release_year': int_or_none(release_year),
@ -4532,7 +4533,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mrr_title == 'Album': if mrr_title == 'Album':
info['album'] = mrr_contents_text info['album'] = mrr_contents_text
elif mrr_title == 'Artist': elif mrr_title == 'Artist':
info['artist'] = mrr_contents_text info['artists'] = [mrr_contents_text] if mrr_contents_text else None
elif mrr_title == 'Song': elif mrr_title == 'Song':
info['track'] = mrr_contents_text info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges'))) owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
@ -4566,7 +4567,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if fmt.get('protocol') == 'm3u8_native': if fmt.get('protocol') == 'm3u8_native':
fmt['__needs_testing'] = True fmt['__needs_testing'] = True
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
v = info.get(s_k) v = info.get(s_k)
if v: if v:
info[d_k] = v info[d_k] = v

View File

@ -196,9 +196,12 @@ class _YoutubeDLOptionParser(optparse.OptionParser):
raise raise
return self.check_values(self.values, self.largs) return self.check_values(self.values, self.largs)
def error(self, msg): def _generate_error_message(self, msg):
msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n' msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg) return f'{self.get_usage()}\n{msg}' if self.usage else msg
def error(self, msg):
raise optparse.OptParseError(self._generate_error_message(msg))
def _get_args(self, args): def _get_args(self, args):
return sys.argv[1:] if args is None else list(args) return sys.argv[1:] if args is None else list(args)

View File

@ -86,11 +86,14 @@ class PluginFinder(importlib.abc.MetaPathFinder):
parts = Path(*fullname.split('.')) parts = Path(*fullname.split('.'))
for path in orderedSet(candidate_locations, lazy=True): for path in orderedSet(candidate_locations, lazy=True):
candidate = path / parts candidate = path / parts
if candidate.is_dir(): try:
yield candidate if candidate.is_dir():
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate yield candidate
elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file():
if parts in dirs_in_zip(path):
yield candidate
except PermissionError as e:
write_string(f'Permission error while accessing modules in "{e.filename}"\n')
def find_spec(self, fullname, path=None, target=None): def find_spec(self, fullname, path=None, target=None):
if fullname not in self.packages: if fullname not in self.packages:

View File

@ -738,9 +738,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
def add(meta_list, info_list=None): def add(meta_list, info_list=None):
value = next(( value = next((
str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list))
if info.get(key) is not None), None) if info.get(key) is not None), None)
if value not in ('', None): if value not in ('', None):
value = ', '.join(map(str, variadic(value)))
value = value.replace('\0', '') # nul character cannot be passed in command line value = value.replace('\0', '') # nul character cannot be passed in command line
metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)})
@ -754,10 +755,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
add(('description', 'synopsis'), 'description') add(('description', 'synopsis'), 'description')
add(('purl', 'comment'), 'webpage_url') add(('purl', 'comment'), 'webpage_url')
add('track', 'track_number') add('track', 'track_number')
add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id'))
add('genre') add('composer', ('composer', 'composers'))
add('genre', ('genre', 'genres'))
add('album') add('album')
add('album_artist') add('album_artist', ('album_artist', 'album_artists'))
add('disc', 'disc_number') add('disc', 'disc_number')
add('show', 'series') add('show', 'series')
add('season_number') add('season_number')