Improve utm parameters detection

This commit is contained in:
tom79 2019-04-17 16:25:42 +02:00
parent eebfc17de8
commit 8d6444b46b
2 changed files with 44 additions and 3 deletions

View File

@ -318,9 +318,7 @@ public class Status implements Parcelable{
public void setContent(String content) {
//Remove UTM by default
this.content = content.replaceAll("&utm_\\w+=[0-9a-zA-Z._-]*", "");
this.content = this.content.replaceAll("&utm_\\w+=[0-9a-zA-Z._-]*", "");
this.content = this.content.replaceAll("\\?utm_\\w+=[0-9a-zA-Z._-]*", "?");
this.content = Helper.remove_tracking_param(content);
}
public Status getReblog() {

View File

@ -452,6 +452,8 @@ public class Helper {
TOOT
}
/**
* Converts emojis in input to unicode
* @param input String
@ -3758,6 +3760,47 @@ public class Helper {
}
/*
* List from ClearUrls
* https://gitlab.com/KevinRoebert/ClearUrls/blob/master/data/data.min.json#L106
*/
private static final String[] UTM_PARAMS = {
"utm_\\w+",
"ga_source",
"ga_medium",
"ga_term",
"ga_content",
"ga_campaign",
"ga_place",
"yclid",
"_openstat",
"fb_action_ids",
"fb_action_types",
"fb_source",
"fb_ref",
"fbclid",
"action_object_map",
"action_type_map",
"action_ref_map",
"gs_l",
"mkt_tok",
"hmb_campaign",
"hmb_medium",
"hmb_source",
"[\\?|&]ref[\\_]?"
};
public static String remove_tracking_param(String original_content){
if( original_content == null)
return original_content;
String cleaned_content = original_content;
for(String utm: UTM_PARAMS){
cleaned_content = cleaned_content.replaceAll("&"+utm+"=[0-9a-zA-Z._-]*", "");
cleaned_content = cleaned_content.replaceAll("&"+utm+"=[0-9a-zA-Z._-]*", "");
cleaned_content = cleaned_content.replaceAll("\\?"+utm+"=[0-9a-zA-Z._-]*", "?");
}
return cleaned_content;
}
}