$domains) { if (in_array($upstream, $domains) || in_array(stripWww($upstream), $domains)) { return $platform; } } } // check for a known fake subdomain (eg. region-code.example.com) foreach (PLATFORMS_FAKESUBDOMAINS as $domain) { // currently doesn't handle formats like www.region-code.example.com if (lstrip($upstream, '.', 1) === $domain) { return platformFromDomain($domain); } } return null; // domain unsupported } function platformFromUpstream(string $upstream): string|null { return (isExactPlatformName($upstreamLow = strtolower($upstream)) ? $upstreamLow : platformFromAlias($upstream) ?? platfromFromDomain($upstream)); } function inPlatformArray(string $platform, array $array): bool { return in_array(normalizePlatform($platform), $array); } function platformMapGet(string $platform, array $array): mixed { return $array[normalizePlatform($platform)] ?? null; } function lstrip(string $str, string $sub, int $num): string { return implode($sub, array_slice(explode($sub, $str), $num)); } function urlLast(string $url): string { $tmp = explode('/', trim(parse_url($url, PHP_URL_PATH), '/')); return end($tmp); } function isAbsoluteUrl(string $str): bool { $strlow = strtolower($str); return (str_starts_with($strlow, 'http://') || str_starts_with($strlow, 'https://')); } function parseAbsoluteUrl(string $str): string|null { return (isAbsoluteUrl($str) ? lstrip($str, '://', 1) : null); } function makeSelfUrl(string $str=''): string { return getRequestProtocol() . '://' . $_SERVER['SERVER_NAME'] . SCRIPT_NAME . $str; } function redirectTo(string $url): void { if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreBool('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) { parse_str(parse_url($url, PHP_URL_QUERY), $params); if (!isset($params['proxatore-history'])) { $url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false'; } } // if ($_SERVER['REQUEST_METHOD'] === 'GET' || $absolute) { header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url); // } else if ($_SERVER['REQUEST_METHOD'] === 'POST') { // echo postRequest(SCRIPT_NAME, 'proxatore-url=' . str_replace('?', '&', $url)); // } die(); } function getRequestProtocol(): string { return $_SERVER['REQUEST_SCHEME'] ?? (($_SERVER['HTTPS'] ?? null) === 'on' ? 'https' : 'http'); } function fetchContent(string $url, int $redirects=-1): array { $ch = curl_init(); $useragent = 'curl/' . curl_version()['version']; // format the UA like curl CLI otherwise some sites can't behave curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, !ALLOW_NONSECURE_SSL); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); $data = [ 'body' => curl_exec($ch), 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL) ?: curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), // 'error' => curl_error($ch), ]; curl_close($ch); return $data; } function makeInternalBareUrl(string $platform, string $relativeUrl): string { return "{$platform}/{$relativeUrl}"; } function makeInternalItemUrl(array $item): string { if ($result = $item['result']) { $item = $result; } return makeInternalBareUrl($item['platform'], $item['relativeurl']); } function makeCanonicalBareUrl(string $platform, string $relativeUrl): string { return 'https://' . (PLATFORMS[$platform][0] ?: $platform) . '/' . $relativeUrl; } function makeCanonicalItemUrl(array|null $item): string|null { return ($item ? makeCanonicalBareUrl($item['platform'], $item['relativeurl']) : null); } function makeEmbedUrl(string $platform, string $relativeUrl, array $meta=null): string { $url = null; if (isset(EMBEDS_PREFIXES_SIMPLE[$platform])) { $url = EMBEDS_PREFIXES_SIMPLE[$platform] . urlLast($relativeUrl); } else if (isset(EMBEDS_PREFIXES_PARAMS[$platform])) { $url = EMBEDS_PREFIXES_PARAMS[$platform]; foreach (PLATFORMS_PARAMS[$platform] as $key) { parse_str(parse_url($relativeUrl, PHP_URL_QUERY), $params); $url = str_replace("[$key]", $params[$key], $url); } } else if (isset(EMBEDS_PREFIXES_FULL[$platform])) { $url = EMBEDS_PREFIXES_FULL[$platform] . urlencode($relativeUrl); } else if ($api = (EMBEDS_API[$platform] ?? null)) { return $meta[$api['meta']]; // } else if ($api = EMBEDS_COMPLEX[$platform] ?? null) { // return $api['prefix'] . makeCanonicalItemUrl(['platform' => $platform, 'relativeurl' => $relativeUrl]) . $api['suffix']; } else { $url = (EMBEDS_DOMAINS[$platform][0] ?? PLATFORMS[$platform][0] ?? PLATFORMS_PROXIES[$platform][0] ?? $platform) . '/' . trim($relativeUrl, '/') . (EMBEDS_SUFFIXES[$platform] ?? ''); } return "https://{$url}"; } function makeDataScrapeUrl(string $platform, string $relativeUrl): string { return 'https://' . ((inPlatformArray($platform, PLATFORMS_USEPROXY) ? (PLATFORMS_PROXIES[$platform][0] ?: PLATFORMS[$platform][0]) : PLATFORMS[$platform][0] ) ?: $platform) . '/' . $relativeUrl; } function makeMediaScrapeUrl(array $item): string { return /* $embedUrl = */ makeEmbedUrl($item['result']['platform'], $item['result']['relativeurl'], $item['meta']); // return (isAbsoluteUrl($embedUrl) // ? $embedUrl // // TODO: if we ever get at this point of the code, then the page has already been scraped and should not do it again for nothing... // : makeDataScrapeUrl($platform, $relativeUrl)); } function getHtmlAttributes(DOMDocument|string $doc, string $tag, string $attr): array { if (is_string($doc)) { $doc = htmldom($doc); } $list = []; foreach ($doc->getElementsByTagName($tag) as $el) { $list[] = $el->getAttribute($attr); } return $list; } function parseMetaTags(DOMDocument $doc): array { $tags = []; foreach ($doc->getElementsByTagName('meta') as $meta) { if ($meta->hasAttribute('name') || $meta->hasAttribute('property')) { $tags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); } } return $tags; } function htmldom(string $body): DOMDocument { libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML(mb_convert_encoding($body, 'HTML-ENTITIES', 'UTF-8')); libxml_clear_errors(); return $doc; } function getAnyVideoUrl(string $txt): string|null { if ($vidpos = (strpos($txt, '.mp4?') ?? strpos($txt, '.mp4'))) { $endpos = strpos($txt, '"', $vidpos); $vidstr = substr($txt, 0, $endpos); $startpos = $endpos - strpos(strrev($vidstr), '"'); $vidstr = substr($txt, $startpos, $endpos-$startpos+1); $vidstr = html_entity_decode($vidstr); $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); return $vidstr; } else { return null; } } function makeResultObject(string $platform, string $relativeUrl, array $meta): array { $data = [ 'platform' => $platform, 'relativeurl' => $relativeUrl, //'datetime' => date('Y-m-d H:i:s'), //'request_time' => time(), 'locale' => $meta['og:locale'] ?? '', 'type' => $meta['og:type'] ?? '', 'image' => $meta['og:image'] ?? '', 'video' => $meta['og:video'] ?? $meta['og:video:url'] ?? '', 'videotype' => $meta['og:video:type'] ?? '', 'htmlvideo' => $meta['og:video'] ?? $meta['og:video:url'] ?? '', 'audio' => $meta['og:audio'] ?? '', 'title' => $meta['og:title'] ?? $meta['og:title'] ?? '', //'author' => $meta['og:site_name'] ?? '', 'description' => $meta['og:description'] ?? $meta['description'] ?? '', 'images' => [], ]; if (inPlatformArray($platform, PLATFORMS_WEBVIDEO) && !$data['video']) { $data['video'] = makeCanonicalItemUrl($data); $data['videotype'] = 'text/html'; } if ($data['video'] && $data['videotype'] === 'text/html') { $proxy = ((inPlatformArray($platform, PLATFORMS_WEBVIDEO) || readProxatoreBool('mediaproxy') || getQueryArray()['proxatore-mediaproxy'] === 'video') ? 'file' : ''); $data['htmlvideo'] = SCRIPT_NAME . "__{$proxy}proxy__/{$platform}/{$data['video']}"; if (readProxatoreBool('htmlmedia')) { $data['video'] = $data['htmlvideo']; $data['videotype'] = 'video/mp4'; } } return $data; } function makeParamsRelativeUrl(string $platform, string $url): string { parse_str(parse_url($url, PHP_URL_QUERY), $params); $url = parse_url($url, PHP_URL_PATH) . '?'; foreach ($params as $key => $value) { if (in_array($key, PLATFORMS_PARAMS[$platform])) { $url .= "{$key}={$value}&"; } } return rtrim($url, '?&'); } function getQueryArray(): array { // switch ($_SERVER['REQUEST_METHOD']) { // case 'GET': return $_GET; // case 'POST': // return $_POST; // } } function readBoolParam(string $key, bool|null $default=null, array $array=null): bool|null { if (!$array) { $array = getQueryArray(); } $value = $array[$key] ?? null; if ($value && $value !== '') { return filter_var($value, FILTER_VALIDATE_BOOLEAN); } else { return $default; } } function readProxatoreBool(string $key, array $array=null): bool|null { return readBoolParam("proxatore-{$key}", OPTIONS_DEFAULTS[$key], $array); // TODO handle domain HTTP referer overrides } function readProxatoreParam(string $key, array $array=null): string|null { if (!$array) { $array = getQueryArray(); } return ($array["proxatore-{$key}"] ?? OPTIONS_DEFAULTS[$key] ?? null); } function getPageData($platform, $relativeUrl): array|null { if ($platform && $relativeUrl && ($data = fetchContent(makeDataScrapeUrl($platform, $relativeUrl)))['body']) { // if (!in_array($platform, PLATFORMS_TRACKING)) { // $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); // } if (isset(PLATFORMS_PARAMS[$platform])) { if (PLATFORMS_PARAMS[$platform] !== true) { $relativeUrl = makeParamsRelativeUrl($platform, $relativeUrl); } } else { $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); } $data['doc'] = htmldom($data['body']); $data['meta'] = parseMetaTags($data['doc']); $data['result'] = makeResultObject($platform, $relativeUrl, $data['meta']); return $data; } else { return null; } } function getPlatformRedirectionUrl($upstream, $relativeUrl) { // TODO: strip query params for platforms that don't need them return makeInternalBareUrl( PLATFORMS_REDIRECTS[$upstream], trim(lstrip(fetchContent(makeInternalBareUrl($upstream, $relativeUrl), 1)['url'], '/', 3), '/')); } function postRequest(string $url, string $body, array $headers=null): string|false { return file_get_contents($url, false, stream_context_create(['http' => [ 'header' => $headers, 'method' => 'POST', 'content' => $body, ]])); } function getCobaltVideo(string $url): string|null { $cobaltData = json_decode(postRequest(COBALT_API, json_encode(['url' => $url]), [ 'Accept: application/json', 'Content-Type: application/json', ])); if ($cobaltData->status === 'redirect' && strpos($cobaltData->url, '.mp4')) { return $cobaltData->url; } else if ($cobaltData->status === 'tunnel' && strpos($cobaltData->filename, '.mp4')) { return SCRIPT_NAME . '__cobaltproxy__/_/' . lstrip($cobaltData->url, '/', 3); } else { return null; } } function fetchPageMedia(array &$item): void { $platform = $item['result']['platform']; $relativeUrl = $item['result']['relativeurl']; if ($api = platformMapGet($platform, PLATFORMS_API)) { $json = null; if ($apiUrl = $api['url'] ?? null) { $json = fetchContent($apiUrl . urlLast($relativeUrl))['body']; } else { $doc = htmldom(fetchContent(makeMediaScrapeUrl($item))['body']); if ($id = $api['id'] ?? null) { $json = $doc->getElementById($id)->textContent; } else if ($tag = $api['tag'] ?? null) { $item['result']['description'] = $doc->getElementsByTagName($tag)[0]->textContent ?? ''; return; } } $data = json_decode($json, true); $values = []; foreach ($api['data'] as $key => $query) { $values[$key] = eval("return \$data{$query};"); } $item['result'] = array_merge($item['result'], $values); } else { $cobaltVideo = null; if (COBALT_API && inPlatformArray($platform, PLATFORMS_COBALT)) { $cobaltVideo = getCobaltVideo($item['url']); } $html = fetchContent(makeMediaScrapeUrl($item))['body']; if (!$item['result']['video']) { $item['result']['video'] = $cobaltVideo ?? getAnyVideoUrl($html) ?? ''; } if (!inPlatformArray($platform, PLATFORMS_NOIMAGES) /* !$immediateResult['image'] */) { $item['result']['images'] = getHtmlAttributes($html, 'img', 'src'); // if (sizeof($immediateResult['images'])) { // //$immediateResult['image'] = $imgs[0]; // } } } } function getWebStreamUrls(string $absoluteUrl, string $options=''): array|null { if (($url = parseAbsoluteUrl($absoluteUrl)) && ($url = preg_replace('/[^A-Za-z0-9-_\/\.]/', '', $url))) { return explode("\n", trim(shell_exec("yt-dlp {$options} -g 'https://{$url}'"))); } else { return null; } } function getYoutubeStreamUrl(string $relativeUrl): string { if ($video = preg_replace('/[^A-Za-z0-9-_]/', '', substr($relativeUrl, -11))) { return getWebStreamUrls("https://youtu.be/{$video}", '-f mp4')[0]; } } function ffmpegStream(string $absoluteUrl): void { if ($urls = getWebStreamUrls($absoluteUrl, '--user-agent "' . USER_AGENT . '"')) { $inputs = ''; foreach ($urls as $url) { $inputs .= " -i '{$url}' "; } header('Content-Type: video/mp4'); passthru("ffmpeg -user_agent '" . USER_AGENT . "' {$inputs} -c:v copy -f ismv -"); } die(); } function streamFile(string $url, string $mime): void { header("Content-Type: {$mime}"); readfile($url); die(); } // TODO: redesign the endpoint names, they're kind of a mess function handleApiRequest(array $segments): void { $api = substr($segments[0], 2, -2); $platform = $segments[1]; $relativeUrl = implode('/', array_slice($segments, 2)); if (($api === 'proxy' || $api === 'media')) { if ($platform === 'youtube') { header('Location: ' . getYoutubeStreamUrl($relativeUrl)); } else if ($api === 'media' && end($segments) === '0') { $relativeUrl = substr($relativeUrl, 0, -2); $data = getPageData($platform, $relativeUrl)['result']; if ($url = ($data['video'] ?: $data['image'])) { header('Location: ' . $url); } } } else if ($api === 'fileproxy') { switch ($platform) { case 'youtube': streamFile(getYoutubeStreamUrl($relativeUrl), 'video/mp4'); break; default: ffmpegStream(makeCanonicalBareUrl($platform, lstrip($relativeUrl, '/', 3))); } } else if ($api === 'cobaltproxy') { streamFile(COBALT_API . $relativeUrl, 'video/mp4'); } else if ($api === 'embed') { header('Location: ' . makeEmbedUrl($platform, $relativeUrl)); } die(); } function linkifyUrls(string $text): string { return preg_replace( '/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/', '$0', $text); }