Compare commits

...

4 Commits

Author SHA1 Message Date
bashonly 9bfe0d15bd
Fix 5cc0a8fd2e
Authored by: bashonly
2023-03-23 14:28:31 -05:00
bashonly 8ceb07e870
[extractor/tiktok] Fix mp3 formats (#6615)
Closes #6608
Authored by: bashonly
2023-03-23 18:46:33 +00:00
bashonly 6bdb64e2a2
[extractor/hollywoodreporter] Add extractors (#6614)
Closes #6525
Authored by: bashonly
2023-03-23 18:45:56 +00:00
bashonly 3ae182ad89
[extractor/pgatour] Add extractor (#6613)
Closes #6537
Authored by: bashonly
2023-03-23 18:45:27 +00:00
5 changed files with 159 additions and 2 deletions

View File

@ -719,6 +719,10 @@ from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hollywoodreporter import (
HollywoodReporterIE,
HollywoodReporterPlaylistIE,
)
from .holodex import HolodexIE
from .hotnewhiphop import HotNewHipHopIE
from .hotstar import (
@ -1393,6 +1397,7 @@ from .periscope import (
PeriscopeIE,
PeriscopeUserIE,
)
from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE

View File

@ -2188,7 +2188,6 @@ class GenericIE(InfoExtractor):
def _extra_manifest_info(self, info, manifest_url):
fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
if fragment_query is not None:
fragment_query = self._configuration_arg('fragment_query', casesense=True)[0]
info['extra_param_to_segment_url'] = (
urllib.parse.urlparse(fragment_query).query or fragment_query
or urllib.parse.urlparse(manifest_url).query or None)

View File

@ -0,0 +1,72 @@
import functools
import re
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import (
ExtractorError,
OnDemandPagedList,
extract_attributes,
get_element_by_class,
get_element_html_by_class,
)
class HollywoodReporterIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/',
'info_dict': {
'id': 'zH4jZaR5',
'ext': 'mp4',
'title': 'md5:a9a1c073770a32f178955997712c4bd9',
'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720',
'upload_date': '20230312',
'timestamp': 1678586423,
'duration': 242.0,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '')
video_id = data['data-video-showcase-trigger']
showcase_type = data['data-video-showcase-type']
if showcase_type == 'jwplayer':
return self.url_result(f'jwplatform:{video_id}', JWPlatformIE)
elif showcase_type == 'youtube':
return self.url_result(video_id, 'Youtube')
else:
raise ExtractorError(f'Unsupported showcase type "{showcase_type}"')
class HollywoodReporterPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/',
'playlist_mincount': 109,
'info_dict': {
'id': '57822',
'title': 'heat-vision-breakdown',
}
}]
def _fetch_page(self, slug, pl_id, page):
page += 1
webpage = self._download_webpage(
f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/',
pl_id, note=f'Downloading playlist page {page}')
section = get_element_by_class('video-playlist-river', webpage) or ''
for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section):
yield self.url_result(url, HollywoodReporterIE)
def _real_extract(self, url):
slug, pl_id = self._match_valid_url(url).group('slug', 'id')
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug)

View File

@ -0,0 +1,47 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
class PGATourIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1',
'info_dict': {
'id': '6322447785112',
'ext': 'mp4',
'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1',
'uploader_id': '6116716431001',
'upload_date': '20230312',
'timestamp': 1678653136,
'duration': 20.011,
'thumbnail': r're:^https://.+\.jpg',
'tags': 'count:7',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday',
'info_dict': {
'id': '6322506425112',
'ext': 'mp4',
'title': 'Follow THE PLAYERS trophy on Championship Sunday',
'description': 'md5:4d29e4bdfa03694a0ebfd08950398568',
'uploader_id': '6082840763001',
'upload_date': '20230313',
'timestamp': 1678739835,
'duration': 123.435,
'thumbnail': r're:^https://.+\.jpg',
'tags': 'count:8',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc')
# From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js
account_id = '6116716431001' if is_tourcast else '6082840763001'
player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj'
return self.url_result(
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
BrightcoveNewIE)

View File

@ -13,6 +13,7 @@ from ..utils import (
LazyList,
UnsupportedError,
UserNotLive,
determine_ext,
format_field,
get_element_by_id,
get_first,
@ -204,6 +205,16 @@ class TikTokBaseIE(InfoExtractor):
known_resolutions = {}
def mp3_meta(url):
return {
'format_note': 'Music track',
'ext': 'mp3',
'acodec': 'mp3',
'vcodec': 'none',
'width': None,
'height': None,
} if determine_ext(url) == 'mp3' else {}
def extract_addr(addr, add_meta={}):
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
if res:
@ -219,7 +230,8 @@ class TikTokBaseIE(InfoExtractor):
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
**add_meta, **parsed_meta,
'format_note': join_nonempty(
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' ')
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
**mp3_meta(url),
} for url in addr.get('url_list') or []]
# Hack: Add direct video links first to prioritize them when removing duplicate formats
@ -553,6 +565,28 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
},
'skip': 'This video is unavailable',
}, {
# slideshow audio-only mp3 format
'url': 'https://www.tiktok.com/@_le_cannibale_/video/7139980461132074283',
'info_dict': {
'id': '7139980461132074283',
'ext': 'mp3',
'title': 'TikTok video #7139980461132074283',
'description': '',
'creator': 'Antaura',
'uploader': '_le_cannibale_',
'uploader_id': '6604511138619654149',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'artist': 'nathan !',
'track': 'grahamscott canon',
'upload_date': '20220905',
'timestamp': 1662406249,
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
'thumbnail': r're:^https://.+\.webp',
},
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',