1
0
mirror of https://github.com/akaessens/NoFbEventScraper synced 2025-05-29 03:34:13 +02:00

182 lines
5.3 KiB
Java

package com.akdev.nofbeventscraper;
import android.content.SharedPreferences;
import android.os.AsyncTask;
import androidx.preference.PreferenceManager;
import java.lang.ref.WeakReference;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.akdev.nofbeventscraper.FbEvent.createEventList;
public class FbScraper {
protected List<FbEvent> events;
protected List<AsyncTask> tasks;
url_type_enum url_type = url_type_enum.EVENT;
private String input_url;
private WeakReference<MainActivity> main; // no context leak with WeakReference
/**
* Constructor with WeakReference to the main activity, to add events.
*
* @param main WeakReference of main activity to prevent context leak
* @param input_url Input url to scrape from
*/
FbScraper(WeakReference<MainActivity> main, String input_url) {
this.main = main;
this.input_url = input_url;
this.events = createEventList();
this.tasks = new ArrayList<>();
}
protected String getPageUrl(String url) throws URISyntaxException, MalformedURLException {
// check for url format
new URL(url).toURI();
String regex = "(facebook.com/)(pg/)?([^/?]*)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
String url_prefix = "https://mbasic.facebook.com/";
String url_suffix = "?v=events";
return url_prefix + matcher.group(3) + url_suffix;
} else {
throw new URISyntaxException(url, "Does not contain page.");
}
}
/**
* Strips the facebook event link of the input url.
*
* @param url input url
* @return facebook event url String if one was found
* @throws URISyntaxException if event not found
* @throws MalformedURLException
*/
protected String getEventUrl(String url) throws URISyntaxException, MalformedURLException {
// check for url format
new URL(url).toURI();
String regex = "(facebook.com/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
String url_prefix = "https://m.";
if (main != null) {
SharedPreferences shared_prefs = PreferenceManager.getDefaultSharedPreferences(main.get());
url_prefix = shared_prefs.getString("url_preference", url_prefix);
}
// rewrite url to m.facebook and dismiss any query strings or referrals
String ret = url_prefix + matcher.group(1);
if (matcher.group(2) != null) {
// add event time identifier
ret += matcher.group(2);
}
return ret;
} else {
throw new URISyntaxException(url, "Does not contain event.");
}
}
void scrapeEvent(String event_url) {
FbEventScraper scraper = new FbEventScraper(this, event_url);
tasks.add(scraper);
scraper.execute();
}
void scrapeEventResultCallback(FbEvent event, int error) {
if (event != null) {
main.get().addEvent(event);
main.get().input_helper(main.get().getString(R.string.done), false);
} else if (url_type == url_type_enum.EVENT) {
main.get().input_helper(main.get().getString(error), true);
}
}
/**
* cancel vestigial async tasks
*/
void killAllTasks() {
try {
for (AsyncTask task : tasks) {
task.cancel(true);
task = null;
}
} catch (Exception e) {
e.printStackTrace();
}
}
void scrapePage(String page_url) {
FbPageScraper scraper = new FbPageScraper(this, page_url);
tasks.add(scraper);
scraper.execute();
}
protected void scrapePageResultCallback(List<String> event_urls, int error) {
if (event_urls.size() > 0) {
for (String event_url : event_urls) {
try {
String url = getEventUrl(event_url);
scrapeEvent(url);
} catch (URISyntaxException | MalformedURLException e) {
// ignore this event
}
}
} else {
main.get().input_helper(main.get().getString(error), true);
}
}
void run() {
try {
String event_url = getEventUrl(input_url);
url_type = url_type_enum.EVENT;
scrapeEvent(event_url);
return;
} catch (URISyntaxException | MalformedURLException e) {
url_type = url_type_enum.INVALID;
}
try {
String page_url = getPageUrl(input_url);
url_type = url_type_enum.PAGE;
scrapePage(page_url);
} catch (URISyntaxException | MalformedURLException e) {
url_type = url_type_enum.INVALID;
main.get().input_helper(main.get().getString(R.string.error_url), true);
}
}
enum url_type_enum {EVENT, PAGE, INVALID}
}