From bb3577b7936e1c0d6220da3b5058f398b1347bd4 Mon Sep 17 00:00:00 2001 From: octt <6083316-octospacc@users.noreply.gitlab.com> Date: Fri, 23 May 2025 12:48:47 +0200 Subject: [PATCH] v0.7, 15/01 --- Proxatore.php | 485 ++++++++++++++++++++++++++------------------------ 1 file changed, 248 insertions(+), 237 deletions(-) diff --git a/Proxatore.php b/Proxatore.php index 3037800..1cc9bc9 100644 --- a/Proxatore.php +++ b/Proxatore.php @@ -1,5 +1,5 @@ ['facebook.com', 'm.facebook.com'], @@ -34,7 +34,7 @@ const PLATFORMS_ORDERED = ['telegram']; const PLATFORMS_TRACKING = ['facebook', 'xiaohongshu']; -const PLATFORMS_VIDEO = ['instagram']; +const PLATFORMS_VIDEO = ['facebook', 'instagram']; const EMBEDS = [ 'reddit' => ['embed.reddit.com'], @@ -64,30 +64,35 @@ function urlLast($url) { return end(explode('/', trim(parse_url($url, PHP_URL_PATH), '/'))); } +function redirectTo($internalUrl) { + header('Location: ' . $_SERVER['SCRIPT_NAME'] . '/' . $internalUrl); + die(); +} + function fetchContent($url, $redirects=-1) { - $ch = curl_init(); - //$useragent = 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0'; - $useragent = 'curl/' . curl_version()['version']; - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects); - curl_setopt($ch, CURLOPT_USERAGENT, $useragent); - $body = curl_exec($ch); - http_response_code($code = curl_getinfo($ch, CURLINFO_HTTP_CODE)); - curl_close($ch); - return [ - 'body' => $body, - 'code' => $code, - 'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL), - ]; + $ch = curl_init(); + //$useragent = 'Mozilla/5.0 (X11; Linux x86_64; rv:129.0) Gecko/20100101 Firefox/129.0'; + $useragent = 'curl/' . curl_version()['version']; + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects); + curl_setopt($ch, CURLOPT_USERAGENT, $useragent); + $body = curl_exec($ch); + http_response_code($code = curl_getinfo($ch, CURLINFO_HTTP_CODE)); + curl_close($ch); + return [ + 'body' => $body, + 'code' => $code, + 'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL), + ]; } function makeCanonicalUrl($item) { if (!$item) { return NULL; } - return 'https://' . (PLATFORMS[$item['platform']][0] ?? '') . '/' . $item['relativeurl']; + return 'https://' . (PLATFORMS[$item['platform']][0] ?: $item['platform']) . '/' . $item['relativeurl']; } function makeEmbedUrl($platform, $relativeUrl) { @@ -96,7 +101,7 @@ function makeEmbedUrl($platform, $relativeUrl) { } else if (isset(EMBEDS_PREFIXES_FULL[$platform])) { return 'https://' . EMBEDS_PREFIXES_FULL[$platform] . $relativeUrl; } else { - return 'https://' . (EMBEDS[$platform][0] ?: PLATFORMS[$platform][0] ?: PLATFORMS_PROXIES[$platform][0] ?: '') . '/' . trim($relativeUrl, '/') . (EMBEDS_SUFFIXES[$platform] ?? ''); + return 'https://' . (EMBEDS[$platform][0] ?: PLATFORMS[$platform][0] ?: PLATFORMS_PROXIES[$platform][0] ?: $platform) . '/' . trim($relativeUrl, '/') . (EMBEDS_SUFFIXES[$platform] ?? ''); } // switch ($platform) { // case 'tiktok': @@ -113,45 +118,45 @@ function makeScrapeUrl($platform, $relativeUrl) { } function parseMetaTags($doc) { - $metaTags = []; - foreach ($doc->getElementsByTagName('meta') as $meta) { - if ($meta->hasAttribute('name') || $meta->hasAttribute('property')) { - $metaTags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); - } - } - return $metaTags; + $metaTags = []; + foreach ($doc->getElementsByTagName('meta') as $meta) { + if ($meta->hasAttribute('name') || $meta->hasAttribute('property')) { + $metaTags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content'); + } + } + return $metaTags; } function loadHistory() { - $history = []; - if (file_exists(HISTORY_FILE)) { - $lines = file(HISTORY_FILE, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); - foreach ($lines as $line) { - $history[] = json_decode($line, true); - } - } - return $history; + $history = []; + if (file_exists(HISTORY_FILE)) { + $lines = file(HISTORY_FILE, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + foreach ($lines as $line) { + $history[] = json_decode($line, true); + } + } + return $history; } function saveHistory($entry) { - $history = loadHistory(); - $history = array_filter($history, function ($item) use ($entry) { - return $item['platform'] !== $entry['platform'] || $item['relativeurl'] !== $entry['relativeurl']; - }); - $history[] = $entry; - $lines = array_map(fn($item) => json_encode($item, JSON_UNESCAPED_SLASHES), $history); - file_put_contents(HISTORY_FILE, implode(PHP_EOL, $lines) . PHP_EOL, LOCK_EX); + $history = loadHistory(); + $history = array_filter($history, function ($item) use ($entry) { + return $item['platform'] !== $entry['platform'] || $item['relativeurl'] !== $entry['relativeurl']; + }); + $history[] = $entry; + $lines = array_map(fn($item) => json_encode($item, JSON_UNESCAPED_SLASHES), $history); + file_put_contents(HISTORY_FILE, implode(PHP_EOL, $lines) . PHP_EOL, LOCK_EX); } function searchHistory($keyword) { - $results = []; - $history = loadHistory(); - foreach ($history as $entry) { - if (stripos(json_encode($entry, JSON_UNESCAPED_SLASHES), $keyword) !== false) { - $results[] = $entry; - } - } - return $results; + $results = []; + $history = loadHistory(); + foreach ($history as $entry) { + if (stripos(json_encode($entry, JSON_UNESCAPED_SLASHES), $keyword) !== false) { + $results[] = $entry; + } + } + return $results; } $path = $_SERVER['REQUEST_URI'];//parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH); @@ -159,8 +164,7 @@ $immediateResult = null; if (isset($_GET['search']) && ($search = $_GET['search']) !== '') { if (str_starts_with(strtolower($search), 'https://')) { - header('Location: ' . $_SERVER['SCRIPT_NAME'] . '/' . lstrip($search, 'https://')); - die(); + redirectTo(lstrip($search, 'https://')); } $searchResults = searchHistory($search); } else { @@ -173,38 +177,30 @@ if (isset($_GET['search']) && ($search = $_GET['search']) !== '') { if (isset(PLATFORMS[$upstream])) { if (isset(PLATFORMS_ALIASES[$upstream])) { - header('Location: ' . $_SERVER['SCRIPT_NAME'] . '/' . PLATFORMS_ALIASES[$upstream] . '/' . $relativeUrl); - die(); + redirectTo(PLATFORMS_ALIASES[$upstream] . '/' . $relativeUrl); } $platform = $upstream; $domain = PLATFORMS[$upstream][0]; - //$upstreamUrl = "https://$domain"; } else { foreach ([PLATFORMS_PROXIES, PLATFORMS, EMBEDS] as $array) { foreach ($array as $platform => $domains) { if (in_array($upstream, $domains) || in_array(lstrip($upstream, 'www.'), $domains)) { - header('Location: ' . $_SERVER['SCRIPT_NAME'] . '/' . $platform . '/' . $relativeUrl); - die(); - //$upstreamUrl = "https://$upstream"; - //break; + redirectTo($platform . '/' . $relativeUrl); } } unset($platform); } } - //if (!$platform && $upstream === 'vm.tiktok.com') { - // $platform = $upstream;//'tiktok'; - // //'https://vm.tiktok.com/ZNeKpMrUB/'; - //} if (!$platform && isset(PLATFORMS_REDIRECTS[$upstream])) { $relativeUrl = trim(parse_url(fetchContent("$upstream/$relativeUrl", 1)['url'], PHP_URL_PATH), '/'); $platform = PLATFORMS_REDIRECTS[$upstream]; - header('Location: ' . $_SERVER['SCRIPT_NAME'] . '/' . $platform . '/' . $relativeUrl); - die(); + redirectTo($platform . '/' . $relativeUrl); + } else if (!$platform && (str_ends_with($upstream, '.wordpress.com') || str_ends_with($upstream, '.blogspot.com'))) { + $platform = $upstream; } - if ($relativeUrl && /*$upstreamUrl*/ $platform && ($content = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { + if ($relativeUrl && $platform && ($content = fetchContent(makeScrapeUrl($platform, $relativeUrl)))['body']) { if (!in_array($platform, PLATFORMS_TRACKING)) { $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH); } @@ -216,9 +212,11 @@ if (isset($_GET['search']) && ($search = $_GET['search']) !== '') { 'relativeurl' => $relativeUrl, //'datetime' => date('Y-m-d H:i:s'), //'request_time' => time(), + 'locale' => $metaTags['og:locale'] ?? '', 'type' => $metaTags['og:type'] ?? '', 'image' => $metaTags['og:image'] ?? '', 'video' => $metaTags['og:video'] ?? '', + 'videotype' => $metaTags['og:video:type'] ?? '', 'title' => $metaTags['og:title'] ?: $metaTags['og:title'] ?: '', //'author' => $metaTags['og:site_name'] ?? '', 'description' => $metaTags['og:description'] ?: $metaTags['description'] ?: '', @@ -233,12 +231,14 @@ if (isset($_GET['search']) && ($search = $_GET['search']) !== '') { //echo $vidstr; $startpos = $endpos - strpos(strrev($vidstr), '"'); $vidstr = substr($html, $startpos, $endpos-$startpos+1); - //echo $vidstr; //echo '|' . $vidpos . '|' . $startpos . '|' . $endpos; //substr($html, $startpos, $endpos); - $vidstr = json_decode('"' . json_decode('"' . html_entity_decode($vidstr) . '"')); + $vidstr = html_entity_decode($vidstr); + //$vidstr = json_decode('"' . json_decode('"' . ($vidstr) . '"') . ''); + $vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"'); + //$vidstr = json_decode('"' . $vidstr . '"'); //echo $vidstr; $immediateResult['video'] = $vidstr; - //echo '"' . $vidstr . '"'; + //echo '|'.$startpos.'|'.$endpos.'|'; } if (!$immediateResult['image']) { $doc->loadHTML($html); @@ -254,7 +254,10 @@ if (isset($_GET['search']) && ($search = $_GET['search']) !== '') { // saveHistory($immediateResult); //} else if ($content['code'] >= 400) { - $searchResults = searchHistory($relativeUrl); + $searchResults = searchHistory(json_encode([ + 'platform' => $platform, + 'relativeurl' => $relativeUrl, + ], JSON_UNESCAPED_SLASHES));//('"platform":"' . $platform . '","relativeurl":"' . $relativeUrl . '"'); $immediateResult = $searchResults[0]; } else { saveHistory($immediateResult); @@ -275,188 +278,196 @@ if (isset($_GET['search']) && ($search = $_GET['search']) !== '') { + - - + +
@@ -498,7 +509,7 @@ if (isset($_GET['search']) && ($search = $_GET['search']) !== '') {= /*htmlspecialchars*/($item['description']) ?>
- Original on = htmlspecialchars(PLATFORMS[$item['platform']][0] ?? '') ?>
+ Original on = htmlspecialchars(PLATFORMS[$item['platform']][0] ?: $item['platform']) ?>/= htmlspecialchars($item['relativeurl']) ?>
= APPNAME ?> Permalink