1
0
mirror of https://github.com/akaessens/NoFbEventScraper synced 2025-06-05 23:29:13 +02:00

add shortener redirection, replace m. with mbasic.

This commit is contained in:
akaessens
2020-10-03 21:45:19 +02:00
parent 2479cd9c72
commit 748cf3c074
8 changed files with 99 additions and 9 deletions

View File

@ -5,10 +5,12 @@ import android.os.AsyncTask;
import androidx.preference.PreferenceManager;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
@ -33,6 +35,28 @@ public class FbScraper {
this.tasks = new ArrayList<>();
}
protected String getShortened(String url) throws IOException, URISyntaxException {
// check for url format
new URL(url).toURI();
String regex = "(fb.me/)(e/)?([^/?]*)|(facebook.com/event_invite/[a-zA-Z0-9]*)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
//only mbasic does have event ids displayed in HTML
String url_prefix = "https://mbasic.";
// create URL
return url_prefix + matcher.group();
} else {
throw new URISyntaxException(url, "Does not contain page.");
}
}
/**
* Checks if valid URL,
* strips the facebook page id from the input link and create an URL that can be scraped from.
@ -183,11 +207,35 @@ public class FbScraper {
}
}
protected void redirectUrl (String url) {
FbRedirectionResolver resolver = new FbRedirectionResolver(this, url);
resolver.execute();
}
protected void redirectionResultCallback(String url) {
this.input_url = url;
// now try again with expanded url
this.run();
}
/**
* Start scraping input url
*/
void run() {
// check if shortened url
try {
String shortened = getShortened(input_url);
url_type = url_type_enum.SHORT;
redirectUrl(shortened);
return;
} catch (IOException | URISyntaxException e) {
url_type = url_type_enum.INVALID;
}
// check if input url is an event
try {
String event_url = getEventUrl(input_url);
@ -212,5 +260,5 @@ public class FbScraper {
}
// enum for storing url type in this class
enum url_type_enum {EVENT, PAGE, INVALID}
enum url_type_enum {SHORT, EVENT, PAGE, INVALID}
}