UntrackMe-app-android-redir.../app/src/main/java/app/fedilab/nitterizeme/Utils.java

152 lines
5.3 KiB
Java
Raw Normal View History

2020-02-17 11:57:14 +01:00
package app.fedilab.nitterizeme;
/* Copyright 2020 Thomas Schneider
*
* This file is a part of NitterizeMe
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software Foundation; either version 3 of the
* License, or (at your option) any later version.
*
* NitterizeMe is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
* Public License for more details.
*
* You should have received a copy of the GNU General Public License along with NitterizeMe; if not,
* see <http://www.gnu.org/licenses>. */
2020-02-18 12:09:18 +01:00
2020-03-29 12:17:10 +02:00
2020-02-17 11:57:14 +01:00
import java.io.IOException;
2020-02-20 11:20:30 +01:00
import java.net.InetAddress;
2020-02-17 11:57:14 +01:00
import java.net.URL;
2020-02-18 12:09:18 +01:00
import java.util.ArrayList;
import java.util.Arrays;
2020-02-17 11:57:14 +01:00
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.HttpsURLConnection;
2020-02-18 12:09:18 +01:00
import static app.fedilab.nitterizeme.MainActivity.shortener_domains;
2020-02-17 11:57:14 +01:00
class Utils {
static final Pattern urlPattern = Pattern.compile(
"(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,10}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?«»“”‘’]))",
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
2020-03-29 12:17:10 +02:00
private static final String[] UTM_PARAMS = {
"utm_\\w+",
"ga_source",
"ga_medium",
"ga_term",
"ga_content",
"ga_campaign",
"ga_place",
"yclid",
"_openstat",
"fb_action_ids",
"fb_action_types",
"fb_source",
"fb_ref",
"fbclid",
"action_object_map",
"action_type_map",
"action_ref_map",
"gs_l",
"mkt_tok",
"hmb_campaign",
"hmb_medium",
"hmb_source",
"[\\?|&]ref[\\_]?"
2020-02-17 11:57:14 +01:00
2020-03-29 12:17:10 +02:00
};
2020-02-18 11:26:29 +01:00
/**
* Returns the unshortened URL
2020-02-18 12:25:20 +01:00
*
* @param urls ArrayList<String> URL to check
2020-02-18 11:26:29 +01:00
*/
2020-02-18 12:25:20 +01:00
static void checkUrl(ArrayList<String> urls) {
2020-02-17 11:57:14 +01:00
URL url;
2020-02-18 12:09:18 +01:00
String newURL = null;
String comingURl;
2020-02-17 11:57:14 +01:00
try {
2020-02-18 12:25:20 +01:00
comingURl = urls.get(urls.size() - 1);
2020-02-18 12:09:18 +01:00
2020-02-18 12:25:20 +01:00
if (comingURl.startsWith("http://")) {
2020-02-18 12:09:18 +01:00
comingURl = comingURl.replace("http://", "https://");
}
url = new URL(comingURl);
2020-02-17 11:57:14 +01:00
HttpsURLConnection httpsURLConnection = (HttpsURLConnection) url.openConnection();
httpsURLConnection.setRequestProperty("http.keepAlive", "false");
httpsURLConnection.setInstanceFollowRedirects(false);
httpsURLConnection.setRequestMethod("HEAD");
2020-02-18 12:25:20 +01:00
if (httpsURLConnection.getResponseCode() == 301) {
2020-02-17 11:57:14 +01:00
Map<String, List<String>> map = httpsURLConnection.getHeaderFields();
for (Map.Entry<String, List<String>> entry : map.entrySet()) {
if (entry.toString().toLowerCase().startsWith("location")) {
Matcher matcher = urlPattern.matcher(entry.toString());
if (matcher.find()) {
2020-03-29 12:17:10 +02:00
newURL = remove_tracking_param(matcher.group(1));
urls.add(newURL);
2020-02-17 11:57:14 +01:00
}
}
}
}
httpsURLConnection.getInputStream().close();
2020-02-18 12:25:20 +01:00
if (newURL != null && newURL.compareTo(comingURl) != 0) {
2020-02-18 12:09:18 +01:00
URL redirectURL = new URL(newURL);
2020-02-17 11:57:14 +01:00
String host = redirectURL.getHost();
String protocol = redirectURL.getProtocol();
2020-02-18 12:25:20 +01:00
if (protocol != null && host != null) {
if (Arrays.asList(shortener_domains).contains(host)) {
2020-02-18 12:09:18 +01:00
checkUrl(urls);
}
2020-02-17 11:57:14 +01:00
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
2020-02-20 11:20:30 +01:00
/**
* Get time for reaching a domain
2020-02-20 15:25:41 +01:00
*
2020-02-20 11:20:30 +01:00
* @param domain String domain name
* @return long delay
*/
2020-02-20 15:25:41 +01:00
static long ping(String domain) {
2020-02-20 11:20:30 +01:00
long timeDifference = -2;
try {
long beforeTime = System.currentTimeMillis();
//noinspection ResultOfMethodCallIgnored
InetAddress.getByName(domain).isReachable(10000);
long afterTime = System.currentTimeMillis();
timeDifference = afterTime - beforeTime;
2020-02-20 15:25:41 +01:00
} catch (IOException ignored) {
}
2020-02-20 11:20:30 +01:00
return timeDifference;
}
2020-03-29 12:17:10 +02:00
/**
* Clean URLs from utm parameters
*
* @param url String URL
* @return cleaned URL String
*/
private static String remove_tracking_param(String url) {
if (url != null) {
for (String utm : UTM_PARAMS) {
url = url.replaceAll("&amp;" + utm + "=[0-9a-zA-Z._-]*", "");
url = url.replaceAll("&" + utm + "=[0-9a-zA-Z._-]*", "");
url = url.replaceAll("\\?" + utm + "=[0-9a-zA-Z._-]*", "?");
}
}
if (url != null && url.endsWith("?")) {
url = url.substring(0, url.length() - 1);
}
return url;
}
2020-02-17 11:57:14 +01:00
}