From a6cc1c149f228149dcf15387a276d5cbda0490aa Mon Sep 17 00:00:00 2001 From: Thomas Date: Sat, 4 Apr 2020 09:53:49 +0200 Subject: [PATCH] comment #8 - Remove amp params in URLs --- app/src/main/java/app/fedilab/nitterizeme/Utils.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/app/fedilab/nitterizeme/Utils.java b/app/src/main/java/app/fedilab/nitterizeme/Utils.java index 31c58d5..cedc023 100644 --- a/app/src/main/java/app/fedilab/nitterizeme/Utils.java +++ b/app/src/main/java/app/fedilab/nitterizeme/Utils.java @@ -31,8 +31,9 @@ import static app.fedilab.nitterizeme.MainActivity.shortener_domains; class Utils { + private static String urlRegex = "(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,10}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))"; static final Pattern urlPattern = Pattern.compile( - "(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,10}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))", + urlRegex, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); private static final String[] UTM_PARAMS = { "utm_\\w+", @@ -57,7 +58,8 @@ class Utils { "hmb_campaign", "hmb_medium", "hmb_source", - "[\\?|&]ref[\\_]?" + "[\\?|&]ref[\\_]?", + "amp[#\\w]+" }; @@ -141,6 +143,7 @@ class Utils { url = url.replaceAll("&" + utm + "=[0-9a-zA-Z._-]*", ""); url = url.replaceAll("&" + utm + "=[0-9a-zA-Z._-]*", ""); url = url.replaceAll("\\?" + utm + "=[0-9a-zA-Z._-]*", "?"); + url = url.replaceAll("/" + utm + "="+ urlRegex, "/"); } } if (url != null && url.endsWith("?")) {