mirror of
https://github.com/akaessens/NoFbEventScraper
synced 2025-06-05 23:29:13 +02:00
add shortener redirection, replace m. with mbasic.
This commit is contained in:
@ -5,10 +5,12 @@ import android.os.AsyncTask;
|
||||
|
||||
import androidx.preference.PreferenceManager;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.ref.WeakReference;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
@ -33,6 +35,28 @@ public class FbScraper {
|
||||
this.tasks = new ArrayList<>();
|
||||
}
|
||||
|
||||
protected String getShortened(String url) throws IOException, URISyntaxException {
|
||||
// check for url format
|
||||
new URL(url).toURI();
|
||||
|
||||
String regex = "(fb.me/)(e/)?([^/?]*)|(facebook.com/event_invite/[a-zA-Z0-9]*)";
|
||||
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
Matcher matcher = pattern.matcher(url);
|
||||
|
||||
if (matcher.find()) {
|
||||
//only mbasic does have event ids displayed in HTML
|
||||
String url_prefix = "https://mbasic.";
|
||||
|
||||
// create URL
|
||||
return url_prefix + matcher.group();
|
||||
|
||||
} else {
|
||||
throw new URISyntaxException(url, "Does not contain page.");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if valid URL,
|
||||
* strips the facebook page id from the input link and create an URL that can be scraped from.
|
||||
@ -183,11 +207,35 @@ public class FbScraper {
|
||||
}
|
||||
}
|
||||
|
||||
protected void redirectUrl (String url) {
|
||||
FbRedirectionResolver resolver = new FbRedirectionResolver(this, url);
|
||||
|
||||
resolver.execute();
|
||||
}
|
||||
protected void redirectionResultCallback(String url) {
|
||||
this.input_url = url;
|
||||
|
||||
// now try again with expanded url
|
||||
this.run();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start scraping input url
|
||||
*/
|
||||
void run() {
|
||||
|
||||
// check if shortened url
|
||||
try {
|
||||
String shortened = getShortened(input_url);
|
||||
url_type = url_type_enum.SHORT;
|
||||
redirectUrl(shortened);
|
||||
|
||||
return;
|
||||
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
url_type = url_type_enum.INVALID;
|
||||
}
|
||||
|
||||
// check if input url is an event
|
||||
try {
|
||||
String event_url = getEventUrl(input_url);
|
||||
@ -212,5 +260,5 @@ public class FbScraper {
|
||||
}
|
||||
|
||||
// enum for storing url type in this class
|
||||
enum url_type_enum {EVENT, PAGE, INVALID}
|
||||
enum url_type_enum {SHORT, EVENT, PAGE, INVALID}
|
||||
}
|
Reference in New Issue
Block a user