From 3612345fabe380bd327fab284f401d5653c29ee1 Mon Sep 17 00:00:00 2001 From: teddit Date: Mon, 11 Apr 2022 21:30:41 +0200 Subject: [PATCH] proxy i.redd.it and v.redd.it media links in comments (#307) --- inc/commons.js | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- routes/home.js | 28 ++++++++++++++++++++++------ 2 files changed, 70 insertions(+), 7 deletions(-) diff --git a/inc/commons.js b/inc/commons.js index 09846f6..a06c3d6 100644 --- a/inc/commons.js +++ b/inc/commons.js @@ -195,7 +195,54 @@ module.exports = function(request, fs) { let instagramRegex = /(?<=href=")(https?:\/\/)(www+\.)?instagram.com(?=.+")/gm; let protocol = config.https_enabled || config.api_force_https ? 'https://' : 'http://' - + + /** + * Special handling for reddit media domains in comments hrefs. + * For example a comment might have a direct links to images in i.redd.it: + * Just refer to this + * We want to rewrite these hrefs, but we also need to include the media + * for our backend, so we know where to fetch the media from. + * That comment URL then becomes like this after rewriting, for example: + * Just refer to this + * And then in our backend, we check if we have a 'teddit_proxy' in the req + * query, and proceed to proxy if it does. + */ + const replacable_media_domains = ['i.redd.it', 'v.redd.it', 'preview.redd.it'] + replacable_media_domains.forEach((domain) => { + if (str.includes(domain + "/")) { + const href_regex = new RegExp(`(?<=href=")(https?:\/\/)([A-z.]+\.)?(${domain})(.+?(?="))`, 'gm') + const hrefs = str.match(href_regex) + if (!hrefs) { + return + } + + hrefs.forEach((url) => { + let original_url = url + const valid_exts = ['png', 'jpg', 'jpeg', 'mp4', 'gif', 'gifv'] + const file_ext = getFileExtension(url) + if (valid_exts.includes(file_ext)) { + url = url.replace(domain, config.domain) + + // append the domain info to the query, for teddit backend + let u = new URL(url) + if (u.query) { + url += '&teddit_proxy=' + domain + } else { + url += '?teddit_proxy=' + domain + } + + // also replace the protocol for instances using http only + if (protocol === 'http://' && u.protocol === 'https:') { + url.replace('https://', protocol) + } + str = str.replace(original_url, url) + } + }) + } + }) + + // Continue the normal replace logic + str = str.replace(redditRegex, protocol + config.domain) if(typeof(user_preferences) == 'undefined') diff --git a/routes/home.js b/routes/home.js index 916b107..91c1fe1 100644 --- a/routes/home.js +++ b/routes/home.js @@ -18,15 +18,31 @@ homeRoute.get('/:sort?', async (req, res, next) => { let proxyable = sortby.includes('.jpg') || sortby.includes('.png') || - sortby.includes('.jpeg') + sortby.includes('.jpeg') || + sortby.includes('.mp4') || + sortby.includes('.gif') || + sortby.includes('.gifv') ? true : false; if (proxyable) { - let params = new URLSearchParams(req.query).toString(); - let image_url = `https://preview.redd.it/${sortby}?${params}`; - let proxied_image = await downloadAndSave(image_url); - if (proxied_image) { - return res.redirect(proxied_image); + let media_url = ''; + const replacable_media_domains = ['i.redd.it', 'v.redd.it'] + if (req.query.teddit_proxy) { + if (replacable_media_domains.includes(req.query.teddit_proxy)) { + let full_url = req.protocol + '://' + req.get('host') + req.originalUrl; + let u = new URL(full_url); + let filename = u.pathname || ''; + let query = u.search || ''; + media_url = `https://${req.query.teddit_proxy}${filename}${query}`; + } + } else { + let params = new URLSearchParams(req.query).toString(); + media_url = `https://preview.redd.it/${sortby}?${params}`; + } + + let proxied_media = await downloadAndSave(media_url); + if (proxied_media) { + return res.redirect(proxied_media); } else { return res.redirect('/'); }