mirror of https://github.com/yt-dlp/yt-dlp.git
[ie/tiktok] Fix API extraction (#9548)
Closes #9506 Authored by: bashonly, Grub4K Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
parent
9c42b7eef5
commit
cb61e20c26
|
@ -1805,9 +1805,12 @@ The following extractors use this feature:
|
|||
* `max_comments`: Maximum number of comments to extract - default is `120`
|
||||
|
||||
#### tiktok
|
||||
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
|
||||
* `api_hostname`: Hostname to use for mobile API calls, e.g. `api22-normal-c-alisg.tiktokv.com`
|
||||
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
||||
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
||||
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
||||
* `aid`: Default app ID to use with API calls, e.g. `1180`
|
||||
* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
|
|
@ -4,6 +4,7 @@ import random
|
|||
import re
|
||||
import string
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
|
@ -30,19 +31,65 @@ from ..utils import (
|
|||
|
||||
|
||||
class TikTokBaseIE(InfoExtractor):
|
||||
_APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
|
||||
_WORKING_APP_VERSION = None
|
||||
_APP_NAME = 'trill'
|
||||
_AID = 1180
|
||||
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
|
||||
_WEBPAGE_HOST = 'https://www.tiktok.com/'
|
||||
QUALITIES = ('360p', '540p', '720p', '1080p')
|
||||
|
||||
_APP_INFO_DEFAULTS = {
|
||||
# unique "install id"
|
||||
'iid': None,
|
||||
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
|
||||
'app_name': 'musical_ly',
|
||||
'app_version': '34.1.2',
|
||||
'manifest_app_version': '2023401020',
|
||||
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||
'aid': '0',
|
||||
}
|
||||
_KNOWN_APP_INFO = [
|
||||
'7351144126450059040',
|
||||
'7351149742343391009',
|
||||
'7351153174894626592',
|
||||
]
|
||||
_APP_INFO_POOL = None
|
||||
_APP_INFO = None
|
||||
_APP_USER_AGENT = None
|
||||
|
||||
@property
|
||||
def _API_HOSTNAME(self):
|
||||
return self._configuration_arg(
|
||||
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
|
||||
|
||||
def _get_next_app_info(self):
|
||||
if self._APP_INFO_POOL is None:
|
||||
defaults = {
|
||||
key: self._configuration_arg(key, [default], ie_key=TikTokIE)[0]
|
||||
for key, default in self._APP_INFO_DEFAULTS.items()
|
||||
if key != 'iid'
|
||||
}
|
||||
app_info_list = (
|
||||
self._configuration_arg('app_info', ie_key=TikTokIE)
|
||||
or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
|
||||
self._APP_INFO_POOL = [
|
||||
{**defaults, **dict(
|
||||
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
|
||||
)} for app_info in app_info_list
|
||||
]
|
||||
|
||||
if not self._APP_INFO_POOL:
|
||||
return False
|
||||
|
||||
self._APP_INFO = self._APP_INFO_POOL.pop(0)
|
||||
|
||||
app_name = self._APP_INFO['app_name']
|
||||
version = self._APP_INFO['manifest_app_version']
|
||||
if app_name == 'musical_ly':
|
||||
package = f'com.zhiliaoapp.musically/{version}'
|
||||
else: # trill, aweme
|
||||
package = f'com.ss.android.ugc.{app_name}/{version}'
|
||||
self._APP_USER_AGENT = f'{package} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)'
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _create_url(user_id, video_id):
|
||||
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
|
||||
|
@ -58,7 +105,7 @@ class TikTokBaseIE(InfoExtractor):
|
|||
'universal data', display_id, end_pattern=r'</script>', default={}),
|
||||
('__DEFAULT_SCOPE__', {dict})) or {}
|
||||
|
||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
||||
def _call_api_impl(self, ep, query, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
|
@ -67,80 +114,84 @@ class TikTokBaseIE(InfoExtractor):
|
|||
return self._download_json(
|
||||
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
||||
fatal=fatal, note=note, errnote=errnote, headers={
|
||||
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
|
||||
'User-Agent': self._APP_USER_AGENT,
|
||||
'Accept': 'application/json',
|
||||
}, query=query)
|
||||
|
||||
def _build_api_query(self, query, app_version, manifest_app_version):
|
||||
def _build_api_query(self, query):
|
||||
return {
|
||||
**query,
|
||||
'version_name': app_version,
|
||||
'version_code': manifest_app_version,
|
||||
'build_number': app_version,
|
||||
'manifest_version_code': manifest_app_version,
|
||||
'update_version_code': manifest_app_version,
|
||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||
'uuid': ''.join(random.choices(string.digits, k=16)),
|
||||
'_rticket': int(time.time() * 1000),
|
||||
'ts': int(time.time()),
|
||||
'device_brand': 'Google',
|
||||
'device_type': 'Pixel 7',
|
||||
'device_platform': 'android',
|
||||
'os': 'android',
|
||||
'ssmix': 'a',
|
||||
'_rticket': int(time.time() * 1000),
|
||||
'cdid': str(uuid.uuid4()),
|
||||
'channel': 'googleplay',
|
||||
'aid': self._APP_INFO['aid'],
|
||||
'app_name': self._APP_INFO['app_name'],
|
||||
'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))),
|
||||
'version_name': self._APP_INFO['app_version'],
|
||||
'manifest_version_code': self._APP_INFO['manifest_app_version'],
|
||||
'update_version_code': self._APP_INFO['manifest_app_version'],
|
||||
'ab_version': self._APP_INFO['app_version'],
|
||||
'resolution': '1080*2400',
|
||||
'dpi': 420,
|
||||
'os_version': '13',
|
||||
'os_api': '29',
|
||||
'carrier_region': 'US',
|
||||
'sys_region': 'US',
|
||||
'region': 'US',
|
||||
'app_name': self._APP_NAME,
|
||||
'app_language': 'en',
|
||||
'device_type': 'Pixel 7',
|
||||
'device_brand': 'Google',
|
||||
'language': 'en',
|
||||
'timezone_name': 'America/New_York',
|
||||
'timezone_offset': '-14400',
|
||||
'channel': 'googleplay',
|
||||
'os_api': '29',
|
||||
'os_version': '13',
|
||||
'ac': 'wifi',
|
||||
'mcc_mnc': '310260',
|
||||
'is_my_cn': 0,
|
||||
'aid': self._AID,
|
||||
'ssmix': 'a',
|
||||
'as': 'a1qwert123',
|
||||
'cp': 'cbfhckdckkde1',
|
||||
'is_pad': '0',
|
||||
'current_region': 'US',
|
||||
'app_type': 'normal',
|
||||
'sys_region': 'US',
|
||||
'last_install_time': int(time.time()) - random.randint(86400, 1123200),
|
||||
'timezone_name': 'America/New_York',
|
||||
'residence': 'US',
|
||||
'app_language': 'en',
|
||||
'timezone_offset': '-14400',
|
||||
'host_abi': 'armeabi-v7a',
|
||||
'locale': 'en',
|
||||
'ac2': 'wifi5g',
|
||||
'uoo': '1',
|
||||
'op_region': 'US',
|
||||
'build_number': self._APP_INFO['app_version'],
|
||||
'region': 'US',
|
||||
'ts': int(time.time()),
|
||||
'iid': self._APP_INFO['iid'],
|
||||
'device_id': random.randint(7250000000000000000, 7351147085025500000),
|
||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||
}
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
if not self._WORKING_APP_VERSION:
|
||||
app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
||||
manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
||||
if app_version and manifest_app_version:
|
||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
||||
self.write_debug('Imported app version combo from extractor arguments')
|
||||
elif app_version or manifest_app_version:
|
||||
self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
|
||||
if not self._APP_INFO and not self._get_next_app_info():
|
||||
message = 'No working app info is available'
|
||||
if fatal:
|
||||
raise ExtractorError(message, expected=True)
|
||||
else:
|
||||
self.report_warning(message)
|
||||
return
|
||||
|
||||
if self._WORKING_APP_VERSION:
|
||||
app_version, manifest_app_version = self._WORKING_APP_VERSION
|
||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
||||
return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
||||
|
||||
for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
|
||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
||||
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
|
||||
for count in itertools.count(1):
|
||||
self.write_debug(str(self._APP_INFO))
|
||||
real_query = self._build_api_query(query)
|
||||
try:
|
||||
res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
||||
return res
|
||||
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
if count == len(self._APP_VERSIONS):
|
||||
message = str(e.cause or e.msg)
|
||||
if not self._get_next_app_info():
|
||||
if fatal:
|
||||
raise e
|
||||
raise
|
||||
else:
|
||||
self.report_warning(str(e.cause or e.msg))
|
||||
self.report_warning(message)
|
||||
return
|
||||
self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
|
||||
self.report_warning(f'{message}. Retrying... (attempt {count} of {max_tries})')
|
||||
continue
|
||||
raise e
|
||||
raise
|
||||
|
||||
def _extract_aweme_app(self, aweme_id):
|
||||
feed_list = self._call_api(
|
||||
|
|
Loading…
Reference in New Issue