From c17ab8184a773fc859ac5054a7e4ab168bbe112d Mon Sep 17 00:00:00 2001 From: octt <6083316-octospacc@users.noreply.gitlab.com> Date: Fri, 23 May 2025 12:59:50 +0200 Subject: [PATCH] v0.14, 24/03 --- Proxatore.php | 268 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 180 insertions(+), 88 deletions(-) diff --git a/Proxatore.php b/Proxatore.php index bcff74b..08221c8 100644 --- a/Proxatore.php +++ b/Proxatore.php @@ -1,6 +1,19 @@ false, + 'history' => true, + 'htmlmedia' => false, + 'relativemedia' => false, +]; + +const OPTIONS_OVERRIDES = [ + 'bbs.spacc.eu.org' => [ + 'embedfirst' => true, + ], +]; + const PLATFORMS = [ 'spaccbbs' => ['bbs.spacc.eu.org'], 'bluesky' => ['bsky.app'], @@ -38,13 +51,22 @@ const PLATFORMS_REDIRECTS = [ //'youtu.be' => 'youtube', ]; +const PLATFORMS_API = [ + 'tiktok' => ['https://www.tiktok.com/player/api/v1/items?item_ids=', [ + 'description' => "['items'][0]['desc']", + 'video' => "['items'][0]['video_info']['url_list'][0]", + ]], +]; + const PLATFORMS_FAKE404 = ['telegram']; const PLATFORMS_HACKS = ['bluesky', 'threads', 'twitter', 'x']; const PLATFORMS_ORDERED = ['telegram']; -const PLATFORMS_VIDEO = ['facebook', 'instagram']; +//const PLATFORMS_VIDEO = ['facebook', 'instagram']; + +const PLATFORMS_NOIMAGES = ['altervista.org', 'wordpress.com']; const PLATFORMS_PARAMS = [ 'facebook' => true, @@ -79,6 +101,13 @@ define('APP_SLUG', explode(' ', APPNAME)[1]); define('SCRIPT_NAME', /* $_SERVER['SCRIPT_NAME'] . */ '/'); define('HISTORY_FILE', './' . APP_SLUG . '.history.jsonl'); +function inPlatformArray($platform, $array) { + if (str_contains($platform, '.')) { + $platform = implode('.', array_slice(explode('.', $platform), -2)); + } + return in_array($platform, $array); +} + function lstrip($str, $sub) { return implode($sub, array_slice(explode($sub, $str), 1)); } @@ -95,8 +124,11 @@ function parseAbsoluteUrl($str) { } function redirectTo($url) { - if (!($absolute = parseAbsoluteUrl($url)) && $_GET['proxatore-history'] === 'false' && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false'))) { - $url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false'; + if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreParam('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) { + parse_str(parse_url($url, PHP_URL_QUERY), $params); + if (!isset($params['proxatore-history'])) { + $url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false'; + } } header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url); die(); @@ -106,20 +138,19 @@ function fetchContent($url, $redirects=-1) { $ch = curl_init(); //$useragent = 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0'; //$useragent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0'; - $useragent = 'curl/' . curl_version()['version']; + $useragent = 'curl/' . curl_version()['version']; // format the UA like curl CLI otherwise some sites can't behave curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); - $body = curl_exec($ch); - $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'body' => $body, - 'code' => $code, + $data = [ + 'body' => curl_exec($ch), + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL), ]; + curl_close($ch); + return $data; } function makeCanonicalUrl($item) { @@ -159,6 +190,17 @@ function makeScrapeUrl($platform, $relativeUrl) { return 'https://' . ((in_array($platform, PLATFORMS_HACKS) ? (PLATFORMS_PROXIES[$platform][0] ?: PLATFORMS[$platform][0]) : PLATFORMS[$platform][0]) ?: $platform) . '/' . $relativeUrl; } +function getHtmlAttributes($doc, $tag, $attr) { + if (is_string($doc)) { + $doc = htmldom($doc); + } + $list = []; + foreach ($doc->getElementsByTagName($tag) as $el) { + $list[] = $el->getAttribute($attr); + } + return $list; +} + function parseMetaTags($doc) { $tags = []; foreach ($doc->getElementsByTagName('meta') as $meta) { @@ -197,7 +239,7 @@ function saveHistory($entry) { } $history = loadHistory(); $history = array_filter($history, function ($item) use ($entry) { - return $item['platform'] !== $entry['platform'] || $item['relativeurl'] !== $entry['relativeurl']; + return (($item['platform'] !== $entry['platform']) || ($item['relativeurl'] !== $entry['relativeurl'])); }); $history[] = $entry; $lines = array_map(fn($item) => json_encode($item, JSON_UNESCAPED_SLASHES), $history); @@ -239,14 +281,16 @@ function htmldom($body) { return $doc; } -function getHtmlVideoUrl($html, $vidpos) { - $endpos = strpos($html, '"', $vidpos); - $vidstr = substr($html, 0, $endpos); - $startpos = $endpos - strpos(strrev($vidstr), '"'); - $vidstr = substr($html, $startpos, $endpos-$startpos+1); - $vidstr = html_entity_decode($vidstr); - $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); - return $vidstr; +function getAnyVideoUrl($txt) { + if ($vidpos = (strpos($txt, '.mp4?') ?? strpos($txt, '.mp4'))) { + $endpos = strpos($txt, '"', $vidpos); + $vidstr = substr($txt, 0, $endpos); + $startpos = $endpos - strpos(strrev($vidstr), '"'); + $vidstr = substr($txt, $startpos, $endpos-$startpos+1); + $vidstr = html_entity_decode($vidstr); + $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); + return $vidstr; + } } function makeResultObject($platform, $relativeUrl, $metaTags) { @@ -278,14 +322,77 @@ function makeParamsRelativeUrl($platform, $url) { return rtrim($url, '?&'); } +function readBoolParam($key, $default=null, $array=null) { + if (!$array) { + $array = $_GET; + } + $value = $array[$key]; + if (isset($value) && $value !== '') { + return filter_var($value, FILTER_VALIDATE_BOOLEAN); + } else { + return $default; + } +} + +function readProxatoreParam($key, $array=null) { + return readBoolParam("proxatore-{$key}", OPTIONS_DEFAULTS[$key], $array); + // TODO handle domain HTTP referer overrides +} + +function getPageData($platform, $relativeUrl) { + if ($platform && $relativeUrl && ($data = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { + // if (!in_array($platform, PLATFORMS_TRACKING)) { + // $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + // } + if (isset(PLATFORMS_PARAMS[$platform])) { + if (PLATFORMS_PARAMS[$platform] !== true) { + $relativeUrl = makeParamsRelativeUrl($platform, $relativeUrl); + } + } else { + $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + } + $data['doc'] = htmldom($data['body']); + $data['result'] = makeResultObject($platform, $relativeUrl, parseMetaTags($data['doc'])); + return $data; + } +} + +function handleApiRequest($segments) { + $api = substr($segments[0], 2, -2); + $platform = $segments[1]; + $relativeUrl = implode('/', array_slice($segments, 2)); + if (($api === 'proxy' || $api === 'media')) { + if ($platform === 'youtube' && ($video = preg_replace("/[^A-Za-z0-9-_]/", '', escapeshellarg(substr($relativeUrl, -11))))) { + header('Location: ' . shell_exec("yt-dlp -g '{$video}'")); + } else if ($api === 'media' && end($segments) === '0') { + $relativeUrl = substr($relativeUrl, 0, -2); + $data = getPageData($platform, $relativeUrl)['result']; + if ($url = ($data['video'] ?: $data['image'])) { + header('Location: ' . $url); + } + } + } else if ($api === 'embed') { + header('Location: ' . makeEmbedUrl($platform, $relativeUrl)); + } + die(); +} + +function iframeHtml($result) { ?> + +