From b68e4e6dc2cac49ebe408f6bf46bac32749d7519 Mon Sep 17 00:00:00 2001 From: octt <6083316-octospacc@users.noreply.gitlab.com> Date: Tue, 13 May 2025 01:28:52 +0200 Subject: [PATCH] Replace Proxatore.php --- Proxatore.php | 551 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 371 insertions(+), 180 deletions(-) diff --git a/Proxatore.php b/Proxatore.php index 3528a4e..401bd04 100644 --- a/Proxatore.php +++ b/Proxatore.php @@ -1,5 +1,31 @@ . + */ + +/*********** Configuration ***********/ + +const APP_NAME = 'ποΈ Proxatore'; +const APP_DESCRIPTION = 'a content proxy for viewing and embedding media and text from various platforms.'; + +// if you make changes to the source code, please modify this to point to your modified version +const SOURCE_CODE = 'https://hlb0.octt.eu.org/Drive/Misc/Scripts/Proxatore.php'; + +// cobalt API server URL; set to false or null or '' to avoid using cobalt const COBALT_API = 'http://192.168.1.125:9010/'; const OPTIONS_DEFAULTS = [ @@ -8,56 +34,79 @@ const OPTIONS_DEFAULTS = [ 'htmlmedia' => false, 'relativemedia' => false, 'mediaproxy' => false, + 'viewmode' => 'normal', ]; -const OPTIONS_OVERRIDES = [ - 'bbs.spacc.eu.org' => [ - 'embedfirst' => true, - ], -]; +const GOOGLE_VERIFICATION = 'HjNf-db8xb7lkRNgD3Q8-qeF1lWsbxmCZptRyjLBnrI'; +const BING_VERIFICATION = '45DC0FC265FF4059D48677970BE86150'; + +define('USER_AGENT', "Proxatore/2025/1 ({$_SERVER['SERVER_NAME']})"); +//define('USER_AGENT', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'); + +/*************************************/ + +//define('SCRIPT_NAME', $_SERVER['SCRIPT_NAME'] /* '/' */); +define('SCRIPT_NAME', ($_SERVER['SCRIPT_NAME'] === '/' ? $_SERVER['SCRIPT_NAME'] : "{$_SERVER['SCRIPT_NAME']}/")); +define('HISTORY_FILE', './Proxatore.history.jsonl'); + +// const OPTIONS_OVERRIDES = [ +// 'bbs.spacc.eu.org' => [ +// 'embedfirst' => true, +// ], +// ]; const PLATFORMS = [ - 'spaccbbs' => ['bbs.spacc.eu.org'], + 'spaccbbs' => ['bbs.spacc.eu.org'], 'bluesky' => ['bsky.app'], - 'facebook' => ['facebook.com', 'm.facebook.com'], - 'instagram' => ['instagram.com'], - //'juxt' => ['juxt.pretendo.network'], - 'reddit' => ['old.reddit.com', 'reddit.com'], - 'spotify' => ['open.spotify.com'], - 'telegram' => ['t.me', 'telegram.me'], + 'facebook' => ['facebook.com', 'm.facebook.com'], + 'instagram' => ['instagram.com'], + //'juxt' => ['juxt.pretendo.network'], + 'raiplay' => ['raiplay.it'], + 'reddit' => ['old.reddit.com', 'reddit.com'], + 'spotify' => ['open.spotify.com'], + 'telegram' => ['t.me', 'telegram.me'], 'threads' => ['threads.net', 'threads.com'], - 'tiktok' => ['tiktok.com'], - 'twitter' => ['twitter.com'], - 'x' => ['x.com'], - 'xiaohongshu' => ['xiaohongshu.com'], - 'youtube' => ['youtube.com', 'm.youtube.com'], + 'tiktok' => ['tiktok.com'], + 'twitter' => ['twitter.com'], + 'x' => ['x.com'], + 'xiaohongshu' => ['xiaohongshu.com'], + 'youtube' => ['youtube.com', 'm.youtube.com'], ]; const PLATFORMS_USERSITES = ['altervista.org', 'blogspot.com', 'wordpress.com']; const PLATFORMS_ALIASES = [ - 'x' => 'twitter', + 'x' => 'twitter', ]; const PLATFORMS_PROXIES = [ 'bluesky' => ['fxbsky.app'], - 'instagram' => ['ddinstagram.com', 'd.ddinstagram.com', 'kkinstagram.com'], + 'instagram' => ['ddinstagram.com', 'd.ddinstagram.com', 'kkinstagram.com'], 'threads' => ['vxthreads.net'], - 'tiktok' => ['vxtiktok.com'], - 'twitter' => ['fxtwitter.com', 'vxtwitter.com', 'fixvx.com'], - 'x' => ['fixupx.com', 'girlcockx.com', 'stupidpenisx.com'], + 'tiktok' => ['vxtiktok.com'], + 'twitter' => ['fxtwitter.com', 'vxtwitter.com', 'fixvx.com'], + 'x' => ['fixupx.com', 'girlcockx.com', 'stupidpenisx.com'], ]; const PLATFORMS_REDIRECTS = [ - 'vm.tiktok.com' => 'tiktok', - //'youtu.be' => 'youtube', + 'vm.tiktok.com' => 'tiktok', + 'youtu.be' => 'youtube', ]; const PLATFORMS_API = [ - 'tiktok' => ['https://www.tiktok.com/player/api/v1/items?item_ids=', [ - 'description' => "['items'][0]['desc']", - 'video' => "['items'][0]['video_info']['url_list'][0]", - ]], + 'spotify' => [ + 'id' => '__NEXT_DATA__', + 'data' => [ + 'audio' => "['props']['pageProps']['state']['data']['entity']['audioPreview']['url']", + ], + ], + 'tiktok' => [ + 'url' => 'https://www.tiktok.com/player/api/v1/items?item_ids=', + 'data' => [ + 'description' => "['items'][0]['desc']", + 'video' => "['items'][0]['video_info']['url_list'][0]", + ], + ], ]; const PLATFORMS_COBALT = ['instagram']; @@ -70,6 +119,8 @@ const PLATFORMS_ORDERED = ['telegram']; //const PLATFORMS_VIDEO = ['facebook', 'instagram']; +const PLATFORMS_WEBVIDEO = ['raiplay']; + const PLATFORMS_NOIMAGES = ['altervista.org', 'wordpress.com']; const PLATFORMS_PARAMS = [ @@ -79,32 +130,28 @@ const PLATFORMS_PARAMS = [ ]; const EMBEDS = [ - 'spotify' => ['open.spotify.com/embed/'], - 'reddit' => ['embed.reddit.com'], + 'spotify' => ['open.spotify.com/embed/'], + 'reddit' => ['embed.reddit.com'], ]; const EMBEDS_PREFIXES_SIMPLE = [ - 'tiktok' => 'www.tiktok.com/embed/v3/', - 'twitter' => 'platform.twitter.com/embed/Tweet.html?id=', + 'tiktok' => 'www.tiktok.com/embed/v3/', + 'twitter' => 'platform.twitter.com/embed/Tweet.html?id=', ]; const EMBEDS_PREFIXES_PARAMS = [ - 'youtube' => 'www.youtube.com/embed/[v]', + 'youtube' => 'www.youtube.com/embed/[v]', ]; const EMBEDS_SUFFIXES = [ - 'instagram' => '/embed/captioned/', - 'telegram' => '?embed=1&mode=tme', + 'instagram' => '/embed/captioned/', + 'telegram' => '?embed=1&mode=tme', ]; define('EMBEDS_PREFIXES_FULL', [ - 'facebook' => 'www.facebook.com/plugins/post.php?href=' . urlencode('https://www.facebook.com/'), + 'facebook' => 'www.facebook.com/plugins/post.php?href=' . urlencode('https://www.facebook.com/'), ]); -define('APP_SLUG', explode(' ', APPNAME)[1]); -define('SCRIPT_NAME', /* $_SERVER['SCRIPT_NAME'] . */ '/'); -define('HISTORY_FILE', './' . APP_SLUG . '.history.jsonl'); - function normalizePlatform(string $platform): string { if (str_contains($platform, '.')) { $platform = lstrip($platform, '.', -2); //implode('.', array_slice(explode('.', $platform), -2)); @@ -121,29 +168,33 @@ function platformMapGet(string $platform, array $array): mixed { } function lstrip(string $str, string $sub, int $num): string { - return implode($sub, array_slice(explode($sub, $str), $num)); + return implode($sub, array_slice(explode($sub, $str), $num)); } function urlLast(string $url): string { - return end(explode('/', trim(parse_url($url, PHP_URL_PATH), '/'))); + return end(explode('/', trim(parse_url($url, PHP_URL_PATH), '/'))); } function parseAbsoluteUrl(string $str) { $strlow = strtolower($str); if (str_starts_with($strlow, 'http://') || str_starts_with($strlow, 'https://')) { - return implode('://', array_slice(explode('://', $str), 1)); + return lstrip($str, '://', 1); //implode('://', array_slice(explode('://', $str), 1)); } } function redirectTo($url): void { - if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreParam('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) { + if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreBool('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) { parse_str(parse_url($url, PHP_URL_QUERY), $params); if (!isset($params['proxatore-history'])) { $url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false'; } } - header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url); - die(); + // if ($_SERVER['REQUEST_METHOD'] === 'GET' || $absolute) { + header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url); + // } else if ($_SERVER['REQUEST_METHOD'] === 'POST') { + // echo postRequest(SCRIPT_NAME, 'proxatore-url=' . str_replace('?', '&', $url)); + // } + die(); } function fetchContent(string $url, int $redirects=-1): array { @@ -185,17 +236,9 @@ function makeEmbedUrl(string $platform, string $relativeUrl): string { } else if (isset(EMBEDS_PREFIXES_FULL[$platform])) { $url = EMBEDS_PREFIXES_FULL[$platform] . urlencode($relativeUrl); } else { - $url = (EMBEDS[$platform][0] ?: PLATFORMS[$platform][0] ?: PLATFORMS_PROXIES[$platform][0] ?: $platform) . '/' . trim($relativeUrl, '/') . (EMBEDS_SUFFIXES[$platform] ?? ''); + $url = (EMBEDS[$platform][0] ?? PLATFORMS[$platform][0] ?? PLATFORMS_PROXIES[$platform][0] ?? $platform) . '/' . trim($relativeUrl, '/') . (EMBEDS_SUFFIXES[$platform] ?? ''); } return "https://{$url}"; -// switch ($platform) { -// case 'tiktok': -// return 'https://www.tiktok.com/embed/v3/' . urlLast($relativeUrl); -// case 'twitter': -// return 'https://platform.twitter.com/embed/Tweet.html?id=' . urlLast($relativeUrl); -// default: -// return 'https://' . (EMBEDS[$platform][0] ?: PLATFORMS_PROXIES[$platform][0] ?: PLATFORMS[$platform][0] ?: '') . '/' . $relativeUrl . (EMBEDS_SUFFIXES[$platform] ?? ''); -// } } function makeScrapeUrl(string $platform, string $relativeUrl): string { @@ -237,7 +280,7 @@ function loadHistory(): array { return $history; } -function saveHistory($entry): void { +function saveHistory(array $entry): void { if (inPlatformArray($entry['platform'], PLATFORMS_FAKE404)) { $history = searchExactHistory($entry['platform'], implode('/', array_slice(explode('/', $entry['relativeurl']), -1))); if (sizeof($history)) { @@ -291,8 +334,10 @@ function searchExactHistory(string $platform, string $relativeUrl): array { } function htmldom(string $body): DOMDocument { + libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML(mb_convert_encoding($body, 'HTML-ENTITIES', 'UTF-8')); + libxml_clear_errors(); return $doc; } @@ -309,7 +354,7 @@ function getAnyVideoUrl(string $txt) { } function makeResultObject(string $platform, string $relativeUrl, array $metaTags): array { - return [ + $data = [ 'platform' => $platform, 'relativeurl' => $relativeUrl, //'datetime' => date('Y-m-d H:i:s'), @@ -317,13 +362,35 @@ function makeResultObject(string $platform, string $relativeUrl, array $metaTags 'locale' => $metaTags['og:locale'] ?? '', 'type' => $metaTags['og:type'] ?? '', 'image' => $metaTags['og:image'] ?? '', - 'video' => $metaTags['og:video'] ?: $metaTags['og:video:url'] ?: '', + 'video' => $metaTags['og:video'] ?? $metaTags['og:video:url'] ?? '', 'videotype' => $metaTags['og:video:type'] ?? '', - 'title' => $metaTags['og:title'] ?: $metaTags['og:title'] ?: '', + 'htmlvideo' => $metaTags['og:video'] ?? $metaTags['og:video:url'] ?? '', + 'audio' => $metaTags['og:audio'] ?? '', + 'title' => $metaTags['og:title'] ?? $metaTags['og:title'] ?? '', //'author' => $metaTags['og:site_name'] ?? '', - 'description' => $metaTags['og:description'] ?: $metaTags['description'] ?: '', + 'description' => $metaTags['og:description'] ?? $metaTags['description'] ?? '', 'images' => [], ]; + if (inPlatformArray($platform, PLATFORMS_WEBVIDEO) && !$data['video']) { + $data['video'] = makeCanonicalUrl($data); + $data['videotype'] = 'text/html'; + } + if ($data['video'] && $data['videotype'] === 'text/html') { + $proxy = ((inPlatformArray($platform, PLATFORMS_WEBVIDEO) || readProxatoreBool('mediaproxy') || getQueryArray()['proxatore-mediaproxy'] === 'video') ? 'file' : ''); + $data['htmlvideo'] = SCRIPT_NAME . "__{$proxy}proxy__/{$platform}/{$data['video']}"; + if (readProxatoreBool('htmlmedia')) { + $data['video'] = $data['htmlvideo']; + $data['videotype'] = 'video/mp4'; + } + } + // } else if (readProxatoreBool('mediaproxy') || getQueryArray()['proxatore-mediaproxy'] === 'video') { + // $data['htmlvideo'] = SCRIPT_NAME . "__mediaproxy__/{$platform}/{$data['video']}"; + // if (readProxatoreBool('htmlmedia')) { + // $data['video'] = $data['htmlvideo']; + // $data['videotype'] = 'video/mp4'; + // } + // } + return $data; } function makeParamsRelativeUrl(string $platform, string $url): string { @@ -337,9 +404,18 @@ function makeParamsRelativeUrl(string $platform, string $url): string { return rtrim($url, '?&'); } -function readBoolParam(string $key, $default=null, $array=null) { +function getQueryArray(): array { + // switch ($_SERVER['REQUEST_METHOD']) { + // case 'GET': + return $_GET; + // case 'POST': + // return $_POST; + // } +} + +function readBoolParam(string $key, bool|null $default=null, array $array=null) { if (!$array) { - $array = $_GET; + $array = getQueryArray(); } $value = $array[$key] ?? null; if ($value && $value !== '') { @@ -349,11 +425,18 @@ function readBoolParam(string $key, $default=null, $array=null) { } } -function readProxatoreParam(string $key, $array=null) { +function readProxatoreBool(string $key, array $array=null) { return readBoolParam("proxatore-{$key}", OPTIONS_DEFAULTS[$key], $array); // TODO handle domain HTTP referer overrides } +function readProxatoreParam(string $key, array $array=null) { + if (!$array) { + $array = getQueryArray(); + } + return ($array["proxatore-{$key}"] ?? OPTIONS_DEFAULTS[$key] ?? null); +} + function getPageData($platform, $relativeUrl) { if ($platform && $relativeUrl && ($data = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { // if (!in_array($platform, PLATFORMS_TRACKING)) { @@ -374,15 +457,19 @@ function getPageData($platform, $relativeUrl) { } } -function getCobaltVideo(string $url) { - $cobaltData = json_decode(file_get_contents(COBALT_API, false, stream_context_create(['http' => [ - 'header' => [ - 'Accept: application/json', - 'Content-Type: application/json', - ], +function postRequest(string $url, string $body, array $headers=null): string|false { + return file_get_contents($url, false, stream_context_create(['http' => [ + 'header' => $headers, 'method' => 'POST', - 'content' => json_encode(['url' => $url]), - ]]))); + 'content' => $body, + ]])); +} + +function getCobaltVideo(string $url) { + $cobaltData = json_decode(postRequest(COBALT_API, json_encode(['url' => $url]), [ + 'Accept: application/json', + 'Content-Type: application/json', + ])); if ($cobaltData->status === 'redirect' && strpos($cobaltData->url, '.mp4')) { return $cobaltData->url; } @@ -393,9 +480,16 @@ function fetchPageMedia(string $url, array &$result): void { $relativeUrl = $result['relativeurl']; //if ((in_array($platform, PLATFORMS_VIDEO) && !$immediateResult['video']) || !$immediateResult['image']) { if ($api = platformMapGet($platform, PLATFORMS_API)) { - $data = json_decode(fetchContent($api[0] . urlLast($relativeUrl))['body'], true); + $json = null; + if (isset($api['url'])) { + $json = fetchContent($api['url'] . urlLast($relativeUrl))['body']; + } else if (isset($api['id'])) { + $doc = htmldom(fetchContent(makeEmbedUrl($platform, $relativeUrl))['body']); + $json = $doc->getElementById($api['id'])->textContent; + } + $data = json_decode($json, true); $values = []; - foreach ($api[1] as $key => $query) { + foreach ($api['data'] as $key => $query) { $values[$key] = eval("return \$data{$query};"); } $result = array_merge($result, $values); @@ -406,7 +500,7 @@ function fetchPageMedia(string $url, array &$result): void { } $html = fetchContent(makeEmbedUrl($platform, $relativeUrl))['body']; if (!$result['video']) { - $result['video'] = $cobaltVideo ?? getAnyVideoUrl($html); + $result['video'] = $cobaltVideo ?? getAnyVideoUrl($html) ?? ''; } if (!inPlatformArray($platform, PLATFORMS_NOIMAGES) /* !$immediateResult['image'] */) { $result['images'] = getHtmlAttributes($html, 'img', 'src'); @@ -417,12 +511,38 @@ function fetchPageMedia(string $url, array &$result): void { } } -function getYoutubeStreamUrl(string $relativeUrl): string { - if ($video = preg_replace("/[^A-Za-z0-9-_]/", '', escapeshellarg(substr($relativeUrl, -11)))) { - return trim(shell_exec("yt-dlp -g '{$video}'")); +function getWebStreamUrls(string $absoluteUrl, string $options='') { + if (($url = parseAbsoluteUrl($absoluteUrl)) && ($url = preg_replace('/[^A-Za-z0-9-_\/\.]/', '', $url))) { + return explode("\n", trim(shell_exec("yt-dlp {$options} -g 'https://{$url}'"))); } } +function getYoutubeStreamUrl(string $relativeUrl): string { + if ($video = preg_replace('/[^A-Za-z0-9-_]/', '', substr($relativeUrl, -11))) { + return getWebStreamUrls("https://youtu.be/{$video}", '-f mp4')[0]; //trim(shell_exec("yt-dlp -g 'https://youtube.com/watch?v={$video}'")); + } +} + +function ffmpegStream(string $absoluteUrl): void { + if ($urls = getWebStreamUrls($absoluteUrl, '--user-agent "' . USER_AGENT . '"')) { + $inputs = ''; + foreach ($urls as $url) { + $inputs .= " -i '{$url}' "; + } + header('Content-Type: video/mp4'); + passthru("ffmpeg -user_agent '" . USER_AGENT . "' {$inputs} -c:v copy -f ismv -"); + } + die(); +} + +// function ytdlpStream(string $absoluteUrl): void { +// if (($url = parseAbsoluteUrl($absoluteUrl)) && ($url = preg_replace('/[^A-Za-z0-9-_\/\.]/', '', $url))) { +// header('Content-Type: video/mp4'); +// passthru("yt-dlp -f mp4 -o - 'https://{$url}' | ffmpeg -i - -c:v copy -f ismv -"); +// } +// die(); +// } + // TODO: redesign the endpoint names, they're kind of a mess function handleApiRequest(array $segments): void { $api = substr($segments[0], 2, -2); @@ -438,15 +558,25 @@ function handleApiRequest(array $segments): void { header('Location: ' . $url); } } - } else if ($api === 'fileproxy' && $platform === 'youtube') { - header('Content-Type: video/mp4'); - readfile(getYoutubeStreamUrl($relativeUrl)); + } else if ($api === 'fileproxy') { + switch ($platform) { + case 'youtube': + header('Content-Type: video/mp4'); + readfile(getYoutubeStreamUrl($relativeUrl)); + break; + default: + ffmpegStream('https://' . PLATFORMS[$platform][0] . '/' . lstrip($relativeUrl, '/', 3)); + } } else if ($api === 'embed') { header('Location: ' . makeEmbedUrl($platform, $relativeUrl)); } die(); } +function linkifyUrls(string $text): string { + return preg_replace('/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/', '$0', $text); +} + function iframeHtml($result): void { ?>