mirror of
https://gitlab.com/octospacc/Proxatore.git
synced 2025-06-05 17:19:18 +02:00
463 lines
17 KiB
PHP
463 lines
17 KiB
PHP
<?php
|
|
/*
|
|
* Proxatore, a proxy for viewing and embedding content from various platforms.
|
|
* Copyright (C) 2025 OctoSpacc
|
|
*
|
|
*/
|
|
|
|
require 'history.php';
|
|
|
|
function normalizePlatform(string $platform): string {
|
|
if (str_contains($platform, '.')) {
|
|
$platform = lstrip($platform, '.', -2);
|
|
}
|
|
return $platform;
|
|
}
|
|
|
|
function stripWww(string $domain): string|null {
|
|
return (str_starts_with($domain, 'www.') ? lstrip($domain, '.', 1) : null);
|
|
}
|
|
|
|
function isExactPlatformName($platform): bool {
|
|
return isset(PLATFORMS[$platform]);
|
|
}
|
|
|
|
function platformFromAlias(string $alias): string|null {
|
|
$alias = strtolower($alias);
|
|
return (PLATFORMS_ALIASES[$alias] ?? PLATFORMS_SHORTHANDS[$alias] ?? null);
|
|
}
|
|
|
|
function platfromFromDomain(string $upstream): string|null {
|
|
$upstream = strtolower($upstream);
|
|
// check supported domains from most to least likely
|
|
foreach ([PLATFORMS, PLATFORMS_PROXIES, EMBEDS_DOMAINS] as $array) {
|
|
foreach ($array as $platform => $domains) {
|
|
if (in_array($upstream, $domains) || in_array(stripWww($upstream), $domains)) {
|
|
return $platform;
|
|
}
|
|
}
|
|
}
|
|
// check for a known fake subdomain (eg. region-code.example.com)
|
|
foreach (PLATFORMS_FAKESUBDOMAINS as $domain) {
|
|
// currently doesn't handle formats like www.region-code.example.com
|
|
if (lstrip($upstream, '.', 1) === $domain) {
|
|
return platformFromDomain($domain);
|
|
}
|
|
}
|
|
return null; // domain unsupported
|
|
}
|
|
|
|
function platformFromUpstream(string $upstream): string|null {
|
|
return (isExactPlatformName($upstreamLow = strtolower($upstream))
|
|
? $upstreamLow
|
|
: platformFromAlias($upstream) ?? platfromFromDomain($upstream));
|
|
}
|
|
|
|
function inPlatformArray(string $platform, array $array): bool {
|
|
return in_array(normalizePlatform($platform), $array);
|
|
}
|
|
|
|
function platformMapGet(string $platform, array $array): mixed {
|
|
return $array[normalizePlatform($platform)] ?? null;
|
|
}
|
|
|
|
function lstrip(string $str, string $sub, int $num): string {
|
|
return implode($sub, array_slice(explode($sub, $str), $num));
|
|
}
|
|
|
|
function urlLast(string $url): string {
|
|
$tmp = explode('/', trim(parse_url($url, PHP_URL_PATH), '/'));
|
|
return end($tmp);
|
|
}
|
|
|
|
function isAbsoluteUrl(string $str): bool {
|
|
$strlow = strtolower($str);
|
|
return (str_starts_with($strlow, 'http://') || str_starts_with($strlow, 'https://'));
|
|
}
|
|
|
|
function parseAbsoluteUrl(string $str): string|null {
|
|
return (isAbsoluteUrl($str)
|
|
? lstrip($str, '://', 1)
|
|
: null);
|
|
}
|
|
|
|
function makeSelfUrl(string $str=''): string {
|
|
return getRequestProtocol() . '://' . $_SERVER['SERVER_NAME'] . SCRIPT_NAME . $str;
|
|
}
|
|
|
|
function redirectTo(string $url): void {
|
|
if (!($absolute = parseAbsoluteUrl($url)) && !readProxatoreBool('history') /* && !(str_contains($url, '?proxatore-history=false') || str_contains($url, '&proxatore-history=false')) */) {
|
|
parse_str(parse_url($url, PHP_URL_QUERY), $params);
|
|
if (!isset($params['proxatore-history'])) {
|
|
$url = $url . (str_contains($url, '?') ? '&' : '?') . 'proxatore-history=false';
|
|
}
|
|
}
|
|
// if ($_SERVER['REQUEST_METHOD'] === 'GET' || $absolute) {
|
|
header('Location: ' . ($absolute ? '' : SCRIPT_NAME) . $url);
|
|
// } else if ($_SERVER['REQUEST_METHOD'] === 'POST') {
|
|
// echo postRequest(SCRIPT_NAME, 'proxatore-url=' . str_replace('?', '&', $url));
|
|
// }
|
|
die();
|
|
}
|
|
|
|
function getRequestProtocol(): string {
|
|
return $_SERVER['REQUEST_SCHEME'] ?? (($_SERVER['HTTPS'] ?? null) === 'on' ? 'https' : 'http');
|
|
}
|
|
|
|
function fetchContent(string $url, int $redirects=-1): array {
|
|
$ch = curl_init();
|
|
$useragent = 'curl/' . curl_version()['version']; // format the UA like curl CLI otherwise some sites can't behave
|
|
curl_setopt($ch, CURLOPT_URL, $url);
|
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, !ALLOW_NONSECURE_SSL);
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
|
curl_setopt($ch, CURLOPT_MAXREDIRS, $redirects);
|
|
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
|
|
$data = [
|
|
'body' => curl_exec($ch),
|
|
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
|
|
'url' => curl_getinfo($ch, CURLINFO_REDIRECT_URL) ?: curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
|
|
// 'error' => curl_error($ch),
|
|
];
|
|
curl_close($ch);
|
|
return $data;
|
|
}
|
|
|
|
function makeInternalBareUrl(string $platform, string $relativeUrl): string {
|
|
return "{$platform}/{$relativeUrl}";
|
|
}
|
|
|
|
function makeInternalItemUrl(array $item): string {
|
|
if ($result = $item['result']) {
|
|
$item = $result;
|
|
}
|
|
return makeInternalBareUrl($item['platform'], $item['relativeurl']);
|
|
}
|
|
|
|
function makeCanonicalBareUrl(string $platform, string $relativeUrl): string {
|
|
return 'https://' . (PLATFORMS[$platform][0] ?: $platform) . '/' . $relativeUrl;
|
|
}
|
|
|
|
function makeCanonicalItemUrl(array|null $item): string|null {
|
|
return ($item
|
|
? makeCanonicalBareUrl($item['platform'], $item['relativeurl'])
|
|
: null);
|
|
}
|
|
|
|
function makeEmbedUrl(string $platform, string $relativeUrl, array $meta=null): string {
|
|
$url = null;
|
|
if (isset(EMBEDS_PREFIXES_SIMPLE[$platform])) {
|
|
$url = EMBEDS_PREFIXES_SIMPLE[$platform] . urlLast($relativeUrl);
|
|
} else if (isset(EMBEDS_PREFIXES_PARAMS[$platform])) {
|
|
$url = EMBEDS_PREFIXES_PARAMS[$platform];
|
|
foreach (PLATFORMS_PARAMS[$platform] as $key) {
|
|
parse_str(parse_url($relativeUrl, PHP_URL_QUERY), $params);
|
|
$url = str_replace("[$key]", $params[$key], $url);
|
|
}
|
|
} else if (isset(EMBEDS_PREFIXES_FULL[$platform])) {
|
|
$url = EMBEDS_PREFIXES_FULL[$platform] . urlencode($relativeUrl);
|
|
} else if ($api = (EMBEDS_API[$platform] ?? null)) {
|
|
return $meta[$api['meta']];
|
|
// } else if ($api = EMBEDS_COMPLEX[$platform] ?? null) {
|
|
// return $api['prefix'] . makeCanonicalItemUrl(['platform' => $platform, 'relativeurl' => $relativeUrl]) . $api['suffix'];
|
|
} else {
|
|
$url = (EMBEDS_DOMAINS[$platform][0] ?? PLATFORMS[$platform][0] ?? PLATFORMS_PROXIES[$platform][0] ?? $platform) . '/' . trim($relativeUrl, '/') . (EMBEDS_SUFFIXES[$platform] ?? '');
|
|
}
|
|
return "https://{$url}";
|
|
}
|
|
|
|
function makeDataScrapeUrl(string $platform, string $relativeUrl): string {
|
|
return 'https://' . ((inPlatformArray($platform, PLATFORMS_USEPROXY)
|
|
? (PLATFORMS_PROXIES[$platform][0] ?: PLATFORMS[$platform][0])
|
|
: PLATFORMS[$platform][0]
|
|
) ?: $platform) . '/' . $relativeUrl;
|
|
}
|
|
|
|
function makeMediaScrapeUrl(array $item): string {
|
|
return /* $embedUrl = */ makeEmbedUrl($item['result']['platform'], $item['result']['relativeurl'], $item['meta']);
|
|
// return (isAbsoluteUrl($embedUrl)
|
|
// ? $embedUrl
|
|
// // TODO: if we ever get at this point of the code, then the page has already been scraped and should not do it again for nothing...
|
|
// : makeDataScrapeUrl($platform, $relativeUrl));
|
|
}
|
|
|
|
function getHtmlAttributes(DOMDocument|string $doc, string $tag, string $attr): array {
|
|
if (is_string($doc)) {
|
|
$doc = htmldom($doc);
|
|
}
|
|
$list = [];
|
|
foreach ($doc->getElementsByTagName($tag) as $el) {
|
|
$list[] = $el->getAttribute($attr);
|
|
}
|
|
return $list;
|
|
}
|
|
|
|
function parseMetaTags(DOMDocument $doc): array {
|
|
$tags = [];
|
|
foreach ($doc->getElementsByTagName('meta') as $meta) {
|
|
if ($meta->hasAttribute('name') || $meta->hasAttribute('property')) {
|
|
$tags[$meta->getAttribute('name') ?: $meta->getAttribute('property')] = $meta->getAttribute('content');
|
|
}
|
|
}
|
|
return $tags;
|
|
}
|
|
|
|
function htmldom(string $body): DOMDocument {
|
|
libxml_use_internal_errors(true);
|
|
$doc = new DOMDocument();
|
|
$doc->loadHTML(mb_convert_encoding($body, 'HTML-ENTITIES', 'UTF-8'));
|
|
libxml_clear_errors();
|
|
return $doc;
|
|
}
|
|
|
|
function getAnyVideoUrl(string $txt): string|null {
|
|
if ($vidpos = (strpos($txt, '.mp4?') ?? strpos($txt, '.mp4'))) {
|
|
$endpos = strpos($txt, '"', $vidpos);
|
|
$vidstr = substr($txt, 0, $endpos);
|
|
$startpos = $endpos - strpos(strrev($vidstr), '"');
|
|
$vidstr = substr($txt, $startpos, $endpos-$startpos+1);
|
|
$vidstr = html_entity_decode($vidstr);
|
|
$vidstr = json_decode('"' . json_decode('"' . $vidstr . '"')) ?: json_decode('"' . json_decode('"' . $vidstr) . '"');
|
|
return $vidstr;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function makeResultObject(string $platform, string $relativeUrl, array $meta): array {
|
|
$data = [
|
|
'platform' => $platform,
|
|
'relativeurl' => $relativeUrl,
|
|
//'datetime' => date('Y-m-d H:i:s'),
|
|
//'request_time' => time(),
|
|
'locale' => $meta['og:locale'] ?? '',
|
|
'type' => $meta['og:type'] ?? '',
|
|
'image' => $meta['og:image'] ?? '',
|
|
'video' => $meta['og:video'] ?? $meta['og:video:url'] ?? '',
|
|
'videotype' => $meta['og:video:type'] ?? '',
|
|
'htmlvideo' => $meta['og:video'] ?? $meta['og:video:url'] ?? '',
|
|
'audio' => $meta['og:audio'] ?? '',
|
|
'title' => $meta['og:title'] ?? $meta['og:title'] ?? '',
|
|
//'author' => $meta['og:site_name'] ?? '',
|
|
'description' => $meta['og:description'] ?? $meta['description'] ?? '',
|
|
'images' => [],
|
|
];
|
|
if (inPlatformArray($platform, PLATFORMS_WEBVIDEO) && !$data['video']) {
|
|
$data['video'] = makeCanonicalItemUrl($data);
|
|
$data['videotype'] = 'text/html';
|
|
}
|
|
if ($data['video'] && $data['videotype'] === 'text/html') {
|
|
$proxy = ((inPlatformArray($platform, PLATFORMS_WEBVIDEO) || readProxatoreBool('mediaproxy') || getQueryArray()['proxatore-mediaproxy'] === 'video') ? 'file' : '');
|
|
$data['htmlvideo'] = SCRIPT_NAME . "__{$proxy}proxy__/{$platform}/{$data['video']}";
|
|
if (readProxatoreBool('htmlmedia')) {
|
|
$data['video'] = $data['htmlvideo'];
|
|
$data['videotype'] = 'video/mp4';
|
|
}
|
|
}
|
|
return $data;
|
|
}
|
|
|
|
function makeParamsRelativeUrl(string $platform, string $url): string {
|
|
parse_str(parse_url($url, PHP_URL_QUERY), $params);
|
|
$url = parse_url($url, PHP_URL_PATH) . '?';
|
|
foreach ($params as $key => $value) {
|
|
if (in_array($key, PLATFORMS_PARAMS[$platform])) {
|
|
$url .= "{$key}={$value}&";
|
|
}
|
|
}
|
|
return rtrim($url, '?&');
|
|
}
|
|
|
|
function getQueryArray(): array {
|
|
// switch ($_SERVER['REQUEST_METHOD']) {
|
|
// case 'GET':
|
|
return $_GET;
|
|
// case 'POST':
|
|
// return $_POST;
|
|
// }
|
|
}
|
|
|
|
function readBoolParam(string $key, bool|null $default=null, array $array=null): bool|null {
|
|
if (!$array) {
|
|
$array = getQueryArray();
|
|
}
|
|
$value = $array[$key] ?? null;
|
|
if ($value && $value !== '') {
|
|
return filter_var($value, FILTER_VALIDATE_BOOLEAN);
|
|
} else {
|
|
return $default;
|
|
}
|
|
}
|
|
|
|
function readProxatoreBool(string $key, array $array=null): bool|null {
|
|
return readBoolParam("proxatore-{$key}", OPTIONS_DEFAULTS[$key], $array);
|
|
// TODO handle domain HTTP referer overrides
|
|
}
|
|
|
|
function readProxatoreParam(string $key, array $array=null): string|null {
|
|
if (!$array) {
|
|
$array = getQueryArray();
|
|
}
|
|
return ($array["proxatore-{$key}"] ?? OPTIONS_DEFAULTS[$key] ?? null);
|
|
}
|
|
|
|
function getPageData($platform, $relativeUrl): array|null {
|
|
if ($platform && $relativeUrl && ($data = fetchContent(makeDataScrapeUrl($platform, $relativeUrl)))['body']) {
|
|
// if (!in_array($platform, PLATFORMS_TRACKING)) {
|
|
// $relativeUrl = parse_url($relativeUrl, PHP_URL_PATH);
|
|
// }
|
|
if (isset(PLATFORMS_PARAMS[$platform])) {
|
|
if (PLATFORMS_PARAMS[$platform] !== true) {
|
|
$relativeUrl = makeParamsRelativeUrl($platform, $relativeUrl);
|
|
}
|
|
} else {
|
|
$relativeUrl = parse_url($relativeUrl, PHP_URL_PATH);
|
|
}
|
|
$data['doc'] = htmldom($data['body']);
|
|
$data['meta'] = parseMetaTags($data['doc']);
|
|
$data['result'] = makeResultObject($platform, $relativeUrl, $data['meta']);
|
|
return $data;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function getPlatformRedirectionUrl($upstream, $relativeUrl) {
|
|
// TODO: strip query params for platforms that don't need them
|
|
return makeInternalBareUrl(
|
|
PLATFORMS_REDIRECTS[$upstream],
|
|
trim(lstrip(fetchContent(makeInternalBareUrl($upstream, $relativeUrl), 1)['url'], '/', 3), '/'));
|
|
}
|
|
|
|
function postRequest(string $url, string $body, array $headers=null): string|false {
|
|
return file_get_contents($url, false, stream_context_create(['http' => [
|
|
'header' => $headers,
|
|
'method' => 'POST',
|
|
'content' => $body,
|
|
]]));
|
|
}
|
|
|
|
function getCobaltVideo(string $url): string|null {
|
|
$cobaltData = json_decode(postRequest(COBALT_API, json_encode(['url' => $url]), [
|
|
'Accept: application/json',
|
|
'Content-Type: application/json',
|
|
]));
|
|
if ($cobaltData->status === 'redirect' && strpos($cobaltData->url, '.mp4')) {
|
|
return $cobaltData->url;
|
|
} else if ($cobaltData->status === 'tunnel' && strpos($cobaltData->filename, '.mp4')) {
|
|
return SCRIPT_NAME . '__cobaltproxy__/_/' . lstrip($cobaltData->url, '/', 3);
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function fetchPageMedia(array &$item): void {
|
|
$platform = $item['result']['platform'];
|
|
$relativeUrl = $item['result']['relativeurl'];
|
|
if ($api = platformMapGet($platform, PLATFORMS_API)) {
|
|
$json = null;
|
|
if ($apiUrl = $api['url'] ?? null) {
|
|
$json = fetchContent($apiUrl . urlLast($relativeUrl))['body'];
|
|
} else {
|
|
$doc = htmldom(fetchContent(makeMediaScrapeUrl($item))['body']);
|
|
if ($id = $api['id'] ?? null) {
|
|
$json = $doc->getElementById($id)->textContent;
|
|
} else if ($tag = $api['tag'] ?? null) {
|
|
$item['result']['description'] = $doc->getElementsByTagName($tag)[0]->textContent ?? '';
|
|
return;
|
|
}
|
|
}
|
|
$data = json_decode($json, true);
|
|
$values = [];
|
|
foreach ($api['data'] as $key => $query) {
|
|
$values[$key] = eval("return \$data{$query};");
|
|
}
|
|
$item['result'] = array_merge($item['result'], $values);
|
|
} else {
|
|
$cobaltVideo = null;
|
|
if (COBALT_API && inPlatformArray($platform, PLATFORMS_COBALT)) {
|
|
$cobaltVideo = getCobaltVideo($item['url']);
|
|
}
|
|
$html = fetchContent(makeMediaScrapeUrl($item))['body'];
|
|
if (!$item['result']['video']) {
|
|
$item['result']['video'] = $cobaltVideo ?? getAnyVideoUrl($html) ?? '';
|
|
}
|
|
if (!inPlatformArray($platform, PLATFORMS_NOIMAGES) /* !$immediateResult['image'] */) {
|
|
$item['result']['images'] = getHtmlAttributes($html, 'img', 'src');
|
|
// if (sizeof($immediateResult['images'])) {
|
|
// //$immediateResult['image'] = $imgs[0];
|
|
// }
|
|
}
|
|
}
|
|
}
|
|
|
|
function getWebStreamUrls(string $absoluteUrl, string $options=''): array|null {
|
|
if (($url = parseAbsoluteUrl($absoluteUrl)) && ($url = preg_replace('/[^A-Za-z0-9-_\/\.]/', '', $url))) {
|
|
return explode("\n", trim(shell_exec("yt-dlp {$options} -g 'https://{$url}'")));
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function getYoutubeStreamUrl(string $relativeUrl): string {
|
|
if ($video = preg_replace('/[^A-Za-z0-9-_]/', '', substr($relativeUrl, -11))) {
|
|
return getWebStreamUrls("https://youtu.be/{$video}", '-f mp4')[0];
|
|
}
|
|
}
|
|
|
|
function ffmpegStream(string $absoluteUrl): void {
|
|
if ($urls = getWebStreamUrls($absoluteUrl, '--user-agent "' . USER_AGENT . '"')) {
|
|
$inputs = '';
|
|
foreach ($urls as $url) {
|
|
$inputs .= " -i '{$url}' ";
|
|
}
|
|
header('Content-Type: video/mp4');
|
|
passthru("ffmpeg -user_agent '" . USER_AGENT . "' {$inputs} -c:v copy -f ismv -");
|
|
}
|
|
die();
|
|
}
|
|
|
|
function streamFile(string $url, string $mime): void {
|
|
header("Content-Type: {$mime}");
|
|
readfile($url);
|
|
die();
|
|
}
|
|
|
|
// TODO: redesign the endpoint names, they're kind of a mess
|
|
function handleApiRequest(array $segments): void {
|
|
$api = substr($segments[0], 2, -2);
|
|
$platform = $segments[1];
|
|
$relativeUrl = implode('/', array_slice($segments, 2));
|
|
if (($api === 'proxy' || $api === 'media')) {
|
|
if ($platform === 'youtube') {
|
|
header('Location: ' . getYoutubeStreamUrl($relativeUrl));
|
|
} else if ($api === 'media' && end($segments) === '0') {
|
|
$relativeUrl = substr($relativeUrl, 0, -2);
|
|
$data = getPageData($platform, $relativeUrl)['result'];
|
|
if ($url = ($data['video'] ?: $data['image'])) {
|
|
header('Location: ' . $url);
|
|
}
|
|
}
|
|
} else if ($api === 'fileproxy') {
|
|
switch ($platform) {
|
|
case 'youtube':
|
|
streamFile(getYoutubeStreamUrl($relativeUrl), 'video/mp4');
|
|
break;
|
|
default:
|
|
ffmpegStream(makeCanonicalBareUrl($platform, lstrip($relativeUrl, '/', 3)));
|
|
}
|
|
} else if ($api === 'cobaltproxy') {
|
|
streamFile(COBALT_API . $relativeUrl, 'video/mp4');
|
|
} else if ($api === 'embed') {
|
|
header('Location: ' . makeEmbedUrl($platform, $relativeUrl));
|
|
}
|
|
die();
|
|
}
|
|
|
|
function linkifyUrls(string $text): string {
|
|
return preg_replace(
|
|
'/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/',
|
|
'<a href="$0" target="_blank" rel="noopener nofollow" title="$0">$0</a>',
|
|
$text);
|
|
}
|