From 3245669f4b22c7c475ceccd8f5b703c727303050 Mon Sep 17 00:00:00 2001 From: octospacc Date: Mon, 24 Mar 2025 01:09:54 +0100 Subject: [PATCH] Auto-Backup $'Mon Mar 24 2025 00:31:42 GMT+0100 (Central European Standard Time)' --- MemosYacyBridgeCreate.php | 4 +- Proxatore.php | 474 +++++++++++++++++++++++++++----------- 2 files changed, 344 insertions(+), 134 deletions(-) diff --git a/MemosYacyBridgeCreate.php b/MemosYacyBridgeCreate.php index 4dbcf40..4c33e06 100644 --- a/MemosYacyBridgeCreate.php +++ b/MemosYacyBridgeCreate.php @@ -26,9 +26,9 @@ if ( php_sapi_name() === 'cli' && $argv[1] ) { return; } -$headers = "X-NoProxy: 1\n"; +$headers = "X-NoProxy: 1\ncontent-type: application/grpc-web+proto\n"; foreach ( getallheaders() as $key => $value ) { - if ( !$value || $key === 'Accept-Encoding' ) { + if ( !$value || $key === 'Accept-Encoding' || $key === 'Content-Type' ) { continue; } if ( $key === 'Host' ) { diff --git a/Proxatore.php b/Proxatore.php index 30d2bd3..08221c8 100644 --- a/Proxatore.php +++ b/Proxatore.php @@ -1,14 +1,29 @@ false, + 'history' => true, + 'htmlmedia' => false, + 'relativemedia' => false, +]; + +const OPTIONS_OVERRIDES = [ + 'bbs.spacc.eu.org' => [ + 'embedfirst' => true, + ], +]; + const PLATFORMS = [ 'spaccbbs' => ['bbs.spacc.eu.org'], + 'bluesky' => ['bsky.app'], 'facebook' => ['facebook.com', 'm.facebook.com'], 'instagram' => ['instagram.com'], //'juxt' => ['juxt.pretendo.network'], 'reddit' => ['old.reddit.com', 'reddit.com'], 'spotify' => ['open.spotify.com'], 'telegram' => ['t.me', 'telegram.me'], + 'threads' => ['threads.net'], 'tiktok' => ['tiktok.com'], 'twitter' => ['twitter.com'], 'x' => ['x.com'], @@ -16,12 +31,16 @@ const PLATFORMS = [ 'youtube' => ['youtube.com', 'm.youtube.com'], ]; +const PLATFORMS_USERSITES = ['altervista.org', 'blogspot.com', 'wordpress.com']; + const PLATFORMS_ALIASES = [ 'x' => 'twitter', ]; const PLATFORMS_PROXIES = [ + 'bluesky' => ['fxbsky.app'], 'instagram' => ['ddinstagram.com', 'd.ddinstagram.com'], + 'threads' => ['vxthreads.net'], 'tiktok' => ['vxtiktok.com'], 'twitter' => ['fxtwitter.com', 'vxtwitter.com', 'fixvx.com'], 'x' => ['fixupx.com', 'girlcockx.com', 'stupidpenisx.com'], @@ -32,11 +51,22 @@ const PLATFORMS_REDIRECTS = [ //'youtu.be' => 'youtube', ]; -const PLATFORMS_HACKS = ['twitter', 'x']; +const PLATFORMS_API = [ + 'tiktok' => ['https://www.tiktok.com/player/api/v1/items?item_ids=', [ + 'description' => "['items'][0]['desc']", + 'video' => "['items'][0]['video_info']['url_list'][0]", + ]], +]; + +const PLATFORMS_FAKE404 = ['telegram']; + +const PLATFORMS_HACKS = ['bluesky', 'threads', 'twitter', 'x']; const PLATFORMS_ORDERED = ['telegram']; -const PLATFORMS_VIDEO = ['facebook', 'instagram']; +//const PLATFORMS_VIDEO = ['facebook', 'instagram']; + +const PLATFORMS_NOIMAGES = ['altervista.org', 'wordpress.com']; const PLATFORMS_PARAMS = [ 'facebook' => true, @@ -67,8 +97,16 @@ define('EMBEDS_PREFIXES_FULL', [ 'facebook' => 'www.facebook.com/plugins/post.php?href=' . urlencode('https://www.facebook.com/'), ]); +define('APP_SLUG', explode(' ', APPNAME)[1]); define('SCRIPT_NAME', /* $_SERVER['SCRIPT_NAME'] . */ '/'); -define('HISTORY_FILE', './' . $_SERVER['SCRIPT_NAME'] . '.history.jsonl'); +define('HISTORY_FILE', './' . APP_SLUG . '.history.jsonl'); + +function inPlatformArray($platform, $array) { + if (str_contains($platform, '.')) { + $platform = implode('.', array_slice(explode('.', $platform), -2)); + } + return in_array($platform, $array); +} function lstrip($str, $sub) { return implode($sub, array_slice(explode($sub, $str), 1)); @@ -85,8 +123,14 @@ function parseAbsoluteUrl($str) { } } -function redirectTo($internalUrl) { - header('Location: ' . SCRIPT_NAME . $internalUrl); +function redirectTo($url) { + if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreParam('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) { + parse_str(parse_url($url, PHP_URL_QUERY), $params); + if (!isset($params['proxatore-history'])) { + $url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false'; + } + } + header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url); die(); } @@ -94,20 +138,19 @@ function fetchContent($url, $redirects=-1) { $ch = curl_init(); //$useragent = 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0'; //$useragent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0'; - $useragent = 'curl/' . curl_version()['version']; + $useragent = 'curl/' . curl_version()['version']; // format the UA like curl CLI otherwise some sites can't behave curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); - $body = curl_exec($ch); - $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'body' => $body, - 'code' => $code, + $data = [ + 'body' => curl_exec($ch), + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL), ]; + curl_close($ch); + return $data; } function makeCanonicalUrl($item) { @@ -147,14 +190,25 @@ function makeScrapeUrl($platform, $relativeUrl) { return 'https://' . ((in_array($platform, PLATFORMS_HACKS) ? (PLATFORMS_PROXIES[$platform][0] ?: PLATFORMS[$platform][0]) : PLATFORMS[$platform][0]) ?: $platform) . '/' . $relativeUrl; } +function getHtmlAttributes($doc, $tag, $attr) { + if (is_string($doc)) { + $doc = htmldom($doc); + } + $list = []; + foreach ($doc->getElementsByTagName($tag) as $el) { + $list[] = $el->getAttribute($attr); + } + return $list; +} + function parseMetaTags($doc) { - $metaTags = []; + $tags = []; foreach ($doc->getElementsByTagName('meta') as $meta) { if ($meta->hasAttribute('name') || $meta->hasAttribute('property')) { - $metaTags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); + $tags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); } } - return $metaTags; + return $tags; } function loadHistory() { @@ -169,9 +223,23 @@ function loadHistory() { } function saveHistory($entry) { + if (in_array($entry['platform'], PLATFORMS_FAKE404)) { + $history = searchExactHistory($entry['platform'], implode('/', array_slice(explode('/', $entry['relativeurl']), -1))); + if (sizeof($history)) { + unset($history[0]['relativeurl']); + unset($entry['relativeurl']); + if (json_encode($history[0], JSON_UNESCAPED_SLASHES) === json_encode($entry, JSON_UNESCAPED_SLASHES)) { + return; + } else { + // TODO update cache of main page + } + } else { + // TODO update cache of main page + } + } $history = loadHistory(); $history = array_filter($history, function ($item) use ($entry) { - return $item['platform'] !== $entry['platform'] || $item['relativeurl'] !== $entry['relativeurl']; + return (($item['platform'] !== $entry['platform']) || ($item['relativeurl'] !== $entry['relativeurl'])); }); $history[] = $entry; $lines = array_map(fn($item) => json_encode($item, JSON_UNESCAPED_SLASHES), $history); @@ -180,23 +248,151 @@ function saveHistory($entry) { function searchHistory($keyword) { $results = []; - $history = loadHistory(); - foreach ($history as $entry) { + $fake404 = []; + foreach (loadHistory() as $entry) { if (stripos(json_encode($entry, JSON_UNESCAPED_SLASHES), $keyword) !== false) { + if (in_array($entry['platform'], PLATFORMS_FAKE404)) { + $entry2 = $entry; + unset($entry2['relativeurl']); + foreach ($fake404 as $item) { + if (json_encode($entry2, JSON_UNESCAPED_SLASHES) === json_encode($item, JSON_UNESCAPED_SLASHES)) { + goto skip; + } + } + $fake404[] = $entry2; + } $results[] = $entry; + skip: } } return $results; } -$path = $_SERVER['REQUEST_URI'];//parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH); +function searchExactHistory($platform, $relativeUrl) { + return searchHistory(json_encode([ + 'platform' => $platform, + 'relativeurl' => $relativeUrl, + ], JSON_UNESCAPED_SLASHES)); +} + +function htmldom($body) { + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($body, 'HTML-ENTITIES', 'UTF-8')); + return $doc; +} + +function getAnyVideoUrl($txt) { + if ($vidpos = (strpos($txt, '.mp4?') ?? strpos($txt, '.mp4'))) { + $endpos = strpos($txt, '"', $vidpos); + $vidstr = substr($txt, 0, $endpos); + $startpos = $endpos - strpos(strrev($vidstr), '"'); + $vidstr = substr($txt, $startpos, $endpos-$startpos+1); + $vidstr = html_entity_decode($vidstr); + $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); + return $vidstr; + } +} + +function makeResultObject($platform, $relativeUrl, $metaTags) { + return [ + 'platform' => $platform, + 'relativeurl' => $relativeUrl, + //'datetime' => date('Y-m-d H:i:s'), + //'request_time' => time(), + 'locale' => $metaTags['og:locale'] ?? '', + 'type' => $metaTags['og:type'] ?? '', + 'image' => $metaTags['og:image'] ?? '', + 'video' => $metaTags['og:video'] ?: $metaTags['og:video:url'] ?: '', + 'videotype' => $metaTags['og:video:type'] ?? '', + 'title' => $metaTags['og:title'] ?: $metaTags['og:title'] ?: '', + //'author' => $metaTags['og:site_name'] ?? '', + 'description' => $metaTags['og:description'] ?: $metaTags['description'] ?: '', + 'images' => [], + ]; +} + +function makeParamsRelativeUrl($platform, $url) { + parse_str(parse_url($url, PHP_URL_QUERY), $params); + $url = parse_url($url, PHP_URL_PATH) . '?'; + foreach ($params as $key => $value) { + if (in_array($key, PLATFORMS_PARAMS[$platform])) { + $url .= "{$key}={$value}&"; + } + } + return rtrim($url, '?&'); +} + +function readBoolParam($key, $default=null, $array=null) { + if (!$array) { + $array = $_GET; + } + $value = $array[$key]; + if (isset($value) && $value !== '') { + return filter_var($value, FILTER_VALIDATE_BOOLEAN); + } else { + return $default; + } +} + +function readProxatoreParam($key, $array=null) { + return readBoolParam("proxatore-{$key}", OPTIONS_DEFAULTS[$key], $array); + // TODO handle domain HTTP referer overrides +} + +function getPageData($platform, $relativeUrl) { + if ($platform && $relativeUrl && ($data = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { + // if (!in_array($platform, PLATFORMS_TRACKING)) { + // $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + // } + if (isset(PLATFORMS_PARAMS[$platform])) { + if (PLATFORMS_PARAMS[$platform] !== true) { + $relativeUrl = makeParamsRelativeUrl($platform, $relativeUrl); + } + } else { + $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + } + $data['doc'] = htmldom($data['body']); + $data['result'] = makeResultObject($platform, $relativeUrl, parseMetaTags($data['doc'])); + return $data; + } +} + +function handleApiRequest($segments) { + $api = substr($segments[0], 2, -2); + $platform = $segments[1]; + $relativeUrl = implode('/', array_slice($segments, 2)); + if (($api === 'proxy' || $api === 'media')) { + if ($platform === 'youtube' && ($video = preg_replace("/[^A-Za-z0-9-_]/", '', escapeshellarg(substr($relativeUrl, -11))))) { + header('Location: ' . shell_exec("yt-dlp -g '{$video}'")); + } else if ($api === 'media' && end($segments) === '0') { + $relativeUrl = substr($relativeUrl, 0, -2); + $data = getPageData($platform, $relativeUrl)['result']; + if ($url = ($data['video'] ?: $data['image'])) { + header('Location: ' . $url); + } + } + } else if ($api === 'embed') { + header('Location: ' . makeEmbedUrl($platform, $relativeUrl)); + } + die(); +} + +function iframeHtml($result) { ?> + +
+ ⬅️ Previous + ➡️ Next +
+ + + $domains) { if (in_array($upstream, $domains) || in_array(lstrip($upstream, 'www.'), $domains)) { - redirectTo($platform . '/' . $relativeUrl); + return redirectTo($platform . '/' . $relativeUrl); } } unset($platform); @@ -242,92 +434,70 @@ if (isset($_GET['proxatore-search']) && ($search = $_GET['proxatore-search']) != if (!$platform && isset(PLATFORMS_REDIRECTS[$upstream])) { // TODO: only strip query params for platforms that don't need them - $relativeUrl = trim(parse_url(fetchContent("$upstream/$relativeUrl", 1)['url'], PHP_URL_PATH), '/'); + $relativeUrl = trim(parse_url(fetchContent("{$upstream}/{$relativeUrl}", 1)['url'], PHP_URL_PATH), '/'); $platform = PLATFORMS_REDIRECTS[$upstream]; - redirectTo($platform . '/' . $relativeUrl); - } else if (!$platform && (str_ends_with($upstream, '.wordpress.com') || str_ends_with($upstream, '.blogspot.com'))) { - $platform = $upstream; + return redirectTo("{$platform}/{$relativeUrl}"); + } else if (!$platform) { + foreach (PLATFORMS_USERSITES as $domain) { + if (str_ends_with($upstream, ".{$domain}")) { + $platform = $upstream; + break; + } + } } - if ($relativeUrl && $platform && ($content = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { - http_response_code($content['code']); - // if (!in_array($platform, PLATFORMS_TRACKING)) { - // $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); - // } - if (isset(PLATFORMS_PARAMS[$platform])) { - if (PLATFORMS_PARAMS[$platform] !== true) { - parse_str(parse_url($relativeUrl, PHP_URL_QUERY), $params); - $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH) . '?'; - foreach ($params as $key => $value) { - if (in_array($key, PLATFORMS_PARAMS[$platform])) { - $relativeUrl .= "{$key}={$value}&"; - } - } - } - } else { - $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + //if ($relativeUrl && $platform && ($content = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { + if ($data = getPageData($platform, $relativeUrl)) { + http_response_code($data['code']); + $immediateResult = $data['result']; + $relativeUrl = $immediateResult['relativeurl']; + if ($immediateResult['video'] && $immediateResult['videotype'] === 'text/html' && readProxatoreParam('htmlmedia')) { + $immediateResult['video'] = SCRIPT_NAME . "__proxy__/{$platform}/{$immediateResult['video']}"; + $immediateResult['videotype'] = 'video/mp4'; } - $doc = new DOMDocument(); - $doc->loadHTML($content['body']); - $metaTags = parseMetaTags($doc); - $immediateResult = [ - 'platform' => $platform, - 'relativeurl' => $relativeUrl, - //'datetime' => date('Y-m-d H:i:s'), - //'request_time' => time(), - 'locale' => $metaTags['og:locale'] ?? '', - 'type' => $metaTags['og:type'] ?? '', - 'image' => $metaTags['og:image'] ?? '', - 'video' => $metaTags['og:video'] ?: $metaTags['og:video:url'] ?: '', - 'videotype' => $metaTags['og:video:type'] ?? '', - 'title' => $metaTags['og:title'] ?: $metaTags['og:title'] ?: '', - //'author' => $metaTags['og:site_name'] ?? '', - 'description' => $metaTags['og:description'] ?: $metaTags['description'] ?: '', - 'images' => [], - ]; //if ((in_array($platform, PLATFORMS_VIDEO) && !$immediateResult['video']) || !$immediateResult['image']) { - $html = fetchContent(makeEmbedUrl($platform, $relativeUrl))['body']; - if (!$immediateResult['video'] && ($vidpos = (strpos($html, '.mp4?') ?? strpos($html, '.mp4')))) { - //$startpos = 0;//strpos(strrev(substr($html, 0, $vidpos)), '"'); - $endpos = strpos($html, '"', $vidpos); //strpos(substr($html, $vidpos), '"'); - $vidstr = substr($html, 0, $endpos); - //echo $vidstr; - $startpos = $endpos - strpos(strrev($vidstr), '"'); - $vidstr = substr($html, $startpos, $endpos-$startpos+1); - //echo '|' . $vidpos . '|' . $startpos . '|' . $endpos; //substr($html, $startpos, $endpos); - $vidstr = html_entity_decode($vidstr); - //$vidstr = json_decode('"' . json_decode('"' . ($vidstr) . '"') . ''); - $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); - //$vidstr = json_decode('"' . $vidstr . '"'); - //echo $vidstr; - $immediateResult['video'] = $vidstr; - //echo '|'.$startpos.'|'.$endpos.'|'; + if (isset(PLATFORMS_API[$platform])) { + $api = PLATFORMS_API[$platform]; + $data = json_decode(fetchContent($api[0] . urlLast($relativeUrl))['body'], true); + $values = []; + foreach ($api[1] as $key => $query) { + $values[$key] = eval("return \$data{$query};"); } - //if (!$immediateResult['image']) { - $doc->loadHTML($html); - foreach ($doc->getElementsByTagName('img') as $img) { - array_push($immediateResult['images'], $img->getAttribute('src')); - } - if (sizeof($immediateResult['images'])) { - //$immediateResult['image'] = $imgs[0]; - } - //} + $immediateResult = array_merge($immediateResult, $values); + } else { + $html = fetchContent(makeEmbedUrl($platform, $relativeUrl))['body']; + if (!$immediateResult['video']) { + $immediateResult['video'] = getAnyVideoUrl($html); + } + if (!inPlatformArray($platform, PLATFORMS_NOIMAGES) /* !$immediateResult['image'] */) { + $immediateResult['images'] = getHtmlAttributes($html, 'img', 'src'); + // if (sizeof($immediateResult['images'])) { + // //$immediateResult['image'] = $imgs[0]; + // } + } + } //} //if ($immediateResult['title'] || $immediateResult['description']) { // saveHistory($immediateResult); //} else if ($content['code'] >= 400) { - $searchResults = searchHistory(json_encode([ - 'platform' => $platform, - 'relativeurl' => $relativeUrl, - ], JSON_UNESCAPED_SLASHES));//('"platform":"' . $platform . '","relativeurl":"' . $relativeUrl . '"'); + $searchResults = searchExactHistory($platform, $relativeUrl); if (sizeof($searchResults)) { $immediateResult = $searchResults[0]; } - } else { + } else if (readProxatoreParam('history')) { saveHistory($immediateResult); } $immediateResult['description'] = preg_replace('/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/', '$0', $immediateResult['description']); + if (readProxatoreParam('relativemedia')) { + $count = 0; + foreach (['video', 'image'] as $type) { + if ($immediateResult[$type]) { + $immediateResult[$type] = SCRIPT_NAME . "__media__/{$platform}/{$immediateResult['relativeurl']}/{$count}"; + $count++; + } + } + } $searchResults = [$immediateResult]; } else { http_response_code(404); @@ -444,14 +614,17 @@ h2 { width: 49%; /*padding: 1em;*/ } +.img { + display: inline-block; +} img, video { padding: 1em; } img[src=""], video[src=""] { display: none; } -img + img, -video:not(video[src=""]) + img { +.img + .img, +video:not(video[src=""]) + .img { max-width: 45% !important; } .history-item strong { @@ -505,6 +678,9 @@ video:not(video[src=""]) + img { .search-bar button:hover { background-color: #155dbb; } +ul.platforms a { + text-decoration: none; +} @media (max-width: 600px) { .search-bar input { width: 100%; @@ -533,26 +709,69 @@ video:not(video[src=""]) + img { display: block; } } +/* @media (prefers-color-scheme: dark) { + body { + background-color: #444; + color: white; + } + .container { + background-color: #222; + } + .history-item strong { + color: white; + } + .history-item:hover { + background-color: #333; + } + a { + color:rgb(85, 155, 247); + } +} */
-

- Supported Platforms:

'; + // echo '
Query API
    + //
  • /?proxatore-search={search term} — Make a full-text search or load a given URL
  • + //
  • ...?proxatore-history={true,false} — Specify if a given query must be stored in the global search history (default: true)
  • + //
'; + echo '

Made with 🕸️ and 🧨 by OctoSpacc. + Source Code: Proxatore.php. +

'; } ?> + + +

Search results:

+ +

Nothing was found.

+ +
">

- +

- + - - - + +

- +

@@ -585,15 +803,7 @@ video:not(video[src=""]) + img {

- - -
- ⬅️ Previous - ➡️ Next -
- - - +