diff --git a/MemosYacyBridgeCreate.php b/MemosYacyBridgeCreate.php index 4dbcf40..4c33e06 100644 --- a/MemosYacyBridgeCreate.php +++ b/MemosYacyBridgeCreate.php @@ -26,9 +26,9 @@ if ( php_sapi_name() === 'cli' && $argv[1] ) { return; } -$headers = "X-NoProxy: 1\n"; +$headers = "X-NoProxy: 1\ncontent-type: application/grpc-web+proto\n"; foreach ( getallheaders() as $key => $value ) { - if ( !$value || $key === 'Accept-Encoding' ) { + if ( !$value || $key === 'Accept-Encoding' || $key === 'Content-Type' ) { continue; } if ( $key === 'Host' ) { diff --git a/Proxatore.php b/Proxatore.php index 30d2bd3..08221c8 100644 --- a/Proxatore.php +++ b/Proxatore.php @@ -1,14 +1,29 @@ false, + 'history' => true, + 'htmlmedia' => false, + 'relativemedia' => false, +]; + +const OPTIONS_OVERRIDES = [ + 'bbs.spacc.eu.org' => [ + 'embedfirst' => true, + ], +]; + const PLATFORMS = [ 'spaccbbs' => ['bbs.spacc.eu.org'], + 'bluesky' => ['bsky.app'], 'facebook' => ['facebook.com', 'm.facebook.com'], 'instagram' => ['instagram.com'], //'juxt' => ['juxt.pretendo.network'], 'reddit' => ['old.reddit.com', 'reddit.com'], 'spotify' => ['open.spotify.com'], 'telegram' => ['t.me', 'telegram.me'], + 'threads' => ['threads.net'], 'tiktok' => ['tiktok.com'], 'twitter' => ['twitter.com'], 'x' => ['x.com'], @@ -16,12 +31,16 @@ const PLATFORMS = [ 'youtube' => ['youtube.com', 'm.youtube.com'], ]; +const PLATFORMS_USERSITES = ['altervista.org', 'blogspot.com', 'wordpress.com']; + const PLATFORMS_ALIASES = [ 'x' => 'twitter', ]; const PLATFORMS_PROXIES = [ + 'bluesky' => ['fxbsky.app'], 'instagram' => ['ddinstagram.com', 'd.ddinstagram.com'], + 'threads' => ['vxthreads.net'], 'tiktok' => ['vxtiktok.com'], 'twitter' => ['fxtwitter.com', 'vxtwitter.com', 'fixvx.com'], 'x' => ['fixupx.com', 'girlcockx.com', 'stupidpenisx.com'], @@ -32,11 +51,22 @@ const PLATFORMS_REDIRECTS = [ //'youtu.be' => 'youtube', ]; -const PLATFORMS_HACKS = ['twitter', 'x']; +const PLATFORMS_API = [ + 'tiktok' => ['https://www.tiktok.com/player/api/v1/items?item_ids=', [ + 'description' => "['items'][0]['desc']", + 'video' => "['items'][0]['video_info']['url_list'][0]", + ]], +]; + +const PLATFORMS_FAKE404 = ['telegram']; + +const PLATFORMS_HACKS = ['bluesky', 'threads', 'twitter', 'x']; const PLATFORMS_ORDERED = ['telegram']; -const PLATFORMS_VIDEO = ['facebook', 'instagram']; +//const PLATFORMS_VIDEO = ['facebook', 'instagram']; + +const PLATFORMS_NOIMAGES = ['altervista.org', 'wordpress.com']; const PLATFORMS_PARAMS = [ 'facebook' => true, @@ -67,8 +97,16 @@ define('EMBEDS_PREFIXES_FULL', [ 'facebook' => 'www.facebook.com/plugins/post.php?href=' . urlencode('https://www.facebook.com/'), ]); +define('APP_SLUG', explode(' ', APPNAME)[1]); define('SCRIPT_NAME', /* $_SERVER['SCRIPT_NAME'] . */ '/'); -define('HISTORY_FILE', './' . $_SERVER['SCRIPT_NAME'] . '.history.jsonl'); +define('HISTORY_FILE', './' . APP_SLUG . '.history.jsonl'); + +function inPlatformArray($platform, $array) { + if (str_contains($platform, '.')) { + $platform = implode('.', array_slice(explode('.', $platform), -2)); + } + return in_array($platform, $array); +} function lstrip($str, $sub) { return implode($sub, array_slice(explode($sub, $str), 1)); @@ -85,8 +123,14 @@ function parseAbsoluteUrl($str) { } } -function redirectTo($internalUrl) { - header('Location: ' . SCRIPT_NAME . $internalUrl); +function redirectTo($url) { + if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreParam('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) { + parse_str(parse_url($url, PHP_URL_QUERY), $params); + if (!isset($params['proxatore-history'])) { + $url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false'; + } + } + header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url); die(); } @@ -94,20 +138,19 @@ function fetchContent($url, $redirects=-1) { $ch = curl_init(); //$useragent = 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0'; //$useragent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0'; - $useragent = 'curl/' . curl_version()['version']; + $useragent = 'curl/' . curl_version()['version']; // format the UA like curl CLI otherwise some sites can't behave curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); - $body = curl_exec($ch); - $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - return [ - 'body' => $body, - 'code' => $code, + $data = [ + 'body' => curl_exec($ch), + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), 'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL), ]; + curl_close($ch); + return $data; } function makeCanonicalUrl($item) { @@ -147,14 +190,25 @@ function makeScrapeUrl($platform, $relativeUrl) { return 'https://' . ((in_array($platform, PLATFORMS_HACKS) ? (PLATFORMS_PROXIES[$platform][0] ?: PLATFORMS[$platform][0]) : PLATFORMS[$platform][0]) ?: $platform) . '/' . $relativeUrl; } +function getHtmlAttributes($doc, $tag, $attr) { + if (is_string($doc)) { + $doc = htmldom($doc); + } + $list = []; + foreach ($doc->getElementsByTagName($tag) as $el) { + $list[] = $el->getAttribute($attr); + } + return $list; +} + function parseMetaTags($doc) { - $metaTags = []; + $tags = []; foreach ($doc->getElementsByTagName('meta') as $meta) { if ($meta->hasAttribute('name') || $meta->hasAttribute('property')) { - $metaTags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); + $tags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); } } - return $metaTags; + return $tags; } function loadHistory() { @@ -169,9 +223,23 @@ function loadHistory() { } function saveHistory($entry) { + if (in_array($entry['platform'], PLATFORMS_FAKE404)) { + $history = searchExactHistory($entry['platform'], implode('/', array_slice(explode('/', $entry['relativeurl']), -1))); + if (sizeof($history)) { + unset($history[0]['relativeurl']); + unset($entry['relativeurl']); + if (json_encode($history[0], JSON_UNESCAPED_SLASHES) === json_encode($entry, JSON_UNESCAPED_SLASHES)) { + return; + } else { + // TODO update cache of main page + } + } else { + // TODO update cache of main page + } + } $history = loadHistory(); $history = array_filter($history, function ($item) use ($entry) { - return $item['platform'] !== $entry['platform'] || $item['relativeurl'] !== $entry['relativeurl']; + return (($item['platform'] !== $entry['platform']) || ($item['relativeurl'] !== $entry['relativeurl'])); }); $history[] = $entry; $lines = array_map(fn($item) => json_encode($item, JSON_UNESCAPED_SLASHES), $history); @@ -180,23 +248,151 @@ function saveHistory($entry) { function searchHistory($keyword) { $results = []; - $history = loadHistory(); - foreach ($history as $entry) { + $fake404 = []; + foreach (loadHistory() as $entry) { if (stripos(json_encode($entry, JSON_UNESCAPED_SLASHES), $keyword) !== false) { + if (in_array($entry['platform'], PLATFORMS_FAKE404)) { + $entry2 = $entry; + unset($entry2['relativeurl']); + foreach ($fake404 as $item) { + if (json_encode($entry2, JSON_UNESCAPED_SLASHES) === json_encode($item, JSON_UNESCAPED_SLASHES)) { + goto skip; + } + } + $fake404[] = $entry2; + } $results[] = $entry; + skip: } } return $results; } -$path = $_SERVER['REQUEST_URI'];//parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH); +function searchExactHistory($platform, $relativeUrl) { + return searchHistory(json_encode([ + 'platform' => $platform, + 'relativeurl' => $relativeUrl, + ], JSON_UNESCAPED_SLASHES)); +} + +function htmldom($body) { + $doc = new DOMDocument(); + $doc->loadHTML(mb_convert_encoding($body, 'HTML-ENTITIES', 'UTF-8')); + return $doc; +} + +function getAnyVideoUrl($txt) { + if ($vidpos = (strpos($txt, '.mp4?') ?? strpos($txt, '.mp4'))) { + $endpos = strpos($txt, '"', $vidpos); + $vidstr = substr($txt, 0, $endpos); + $startpos = $endpos - strpos(strrev($vidstr), '"'); + $vidstr = substr($txt, $startpos, $endpos-$startpos+1); + $vidstr = html_entity_decode($vidstr); + $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); + return $vidstr; + } +} + +function makeResultObject($platform, $relativeUrl, $metaTags) { + return [ + 'platform' => $platform, + 'relativeurl' => $relativeUrl, + //'datetime' => date('Y-m-d H:i:s'), + //'request_time' => time(), + 'locale' => $metaTags['og:locale'] ?? '', + 'type' => $metaTags['og:type'] ?? '', + 'image' => $metaTags['og:image'] ?? '', + 'video' => $metaTags['og:video'] ?: $metaTags['og:video:url'] ?: '', + 'videotype' => $metaTags['og:video:type'] ?? '', + 'title' => $metaTags['og:title'] ?: $metaTags['og:title'] ?: '', + //'author' => $metaTags['og:site_name'] ?? '', + 'description' => $metaTags['og:description'] ?: $metaTags['description'] ?: '', + 'images' => [], + ]; +} + +function makeParamsRelativeUrl($platform, $url) { + parse_str(parse_url($url, PHP_URL_QUERY), $params); + $url = parse_url($url, PHP_URL_PATH) . '?'; + foreach ($params as $key => $value) { + if (in_array($key, PLATFORMS_PARAMS[$platform])) { + $url .= "{$key}={$value}&"; + } + } + return rtrim($url, '?&'); +} + +function readBoolParam($key, $default=null, $array=null) { + if (!$array) { + $array = $_GET; + } + $value = $array[$key]; + if (isset($value) && $value !== '') { + return filter_var($value, FILTER_VALIDATE_BOOLEAN); + } else { + return $default; + } +} + +function readProxatoreParam($key, $array=null) { + return readBoolParam("proxatore-{$key}", OPTIONS_DEFAULTS[$key], $array); + // TODO handle domain HTTP referer overrides +} + +function getPageData($platform, $relativeUrl) { + if ($platform && $relativeUrl && ($data = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { + // if (!in_array($platform, PLATFORMS_TRACKING)) { + // $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + // } + if (isset(PLATFORMS_PARAMS[$platform])) { + if (PLATFORMS_PARAMS[$platform] !== true) { + $relativeUrl = makeParamsRelativeUrl($platform, $relativeUrl); + } + } else { + $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); + } + $data['doc'] = htmldom($data['body']); + $data['result'] = makeResultObject($platform, $relativeUrl, parseMetaTags($data['doc'])); + return $data; + } +} + +function handleApiRequest($segments) { + $api = substr($segments[0], 2, -2); + $platform = $segments[1]; + $relativeUrl = implode('/', array_slice($segments, 2)); + if (($api === 'proxy' || $api === 'media')) { + if ($platform === 'youtube' && ($video = preg_replace("/[^A-Za-z0-9-_]/", '', escapeshellarg(substr($relativeUrl, -11))))) { + header('Location: ' . shell_exec("yt-dlp -g '{$video}'")); + } else if ($api === 'media' && end($segments) === '0') { + $relativeUrl = substr($relativeUrl, 0, -2); + $data = getPageData($platform, $relativeUrl)['result']; + if ($url = ($data['video'] ?: $data['image'])) { + header('Location: ' . $url); + } + } + } else if ($api === 'embed') { + header('Location: ' . makeEmbedUrl($platform, $relativeUrl)); + } + die(); +} + +function iframeHtml($result) { ?> + +
Supported Platforms:
Source Code: Proxatore.php
'; + foreach (PLATFORMS_USERSITES as $platform) { + $platforms .= "proxatore-search=
{search term} — Make a full-text search or load a given URLproxatore-history=
{true,false} — Specify if a given query must be stored in the global search history (default: true)Made with 🕸️ and 🧨 by OctoSpacc. + Source Code: Proxatore.php. +
'; } ?> + + +Nothing was found.
+ += htmlspecialchars($item['title']) ?> - = htmlspecialchars($item['platform']) ?> + = htmlspecialchars($item['platform']) ?>
= htmlspecialchars($item['title']) ?> - = htmlspecialchars($item['platform']) ?> + = htmlspecialchars($item['platform']) ?>
= /*htmlspecialchars*/($item['description']) ?>
@@ -585,15 +803,7 @@ video:not(video[src=""]) + img {