pagelinks are supported, currently limited to 5 events

This commit is contained in:
akaessens 2020-09-26 21:54:20 +02:00
parent af504084fe
commit 626128b5dc
6 changed files with 170 additions and 36 deletions

View File

@ -1,28 +1,16 @@
package com.akdev.nofbeventscraper; package com.akdev.nofbeventscraper;
import android.content.SharedPreferences;
import android.os.AsyncTask; import android.os.AsyncTask;
import androidx.preference.PreferenceManager;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import java.io.IOException; import java.io.IOException;
import java.lang.ref.WeakReference;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.Date; import java.util.Date;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.akdev.nofbeventscraper.FbEvent.createEventList;
/** /**
* This class can asynchronously scrape public facebook events * This class can asynchronously scrape public facebook events
@ -36,7 +24,7 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
private FbEvent event; private FbEvent event;
/** /**
* Constructor with WeakReference to the main activity, to update it's text fields. * Constructor with reference to scraper to return results.
* *
* @param scraper Reference to FbScraper * @param scraper Reference to FbScraper
* @param input_url Input url to scrape from * @param input_url Input url to scrape from
@ -162,6 +150,7 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
Document document = Jsoup.connect(url).userAgent(user_agent).get(); Document document = Jsoup.connect(url).userAgent(user_agent).get();
if (document == null) { if (document == null) {
throw new IOException();
} }
String json = document String json = document
.select("script[type = application/ld+json]") .select("script[type = application/ld+json]")
@ -209,7 +198,8 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
} }
/** /**
* When scraping is finished, the scraper callback will receive the Event. * When scraping is finished, the scraper callback will receive the event.
*
* @param aVoid * @param aVoid
*/ */
protected void onPostExecute(Void aVoid) { protected void onPostExecute(Void aVoid) {

View File

@ -0,0 +1,93 @@
package com.akdev.nofbeventscraper;
import android.os.AsyncTask;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
/**
* This class can asynchronously scrape public facebook pages for event ids
* It returns a String list of event urls
*/
public class FbPageScraper extends AsyncTask<Void, Void, Void> {
private FbScraper scraper;
private int error;
private String url;
private List<String> event_links = new ArrayList<String>();
/**
* Constructor with reference to scraper to return results.
*
* @param scraper Reference to FbScraper
* @param page_url Input url to scrape from
*/
FbPageScraper(FbScraper scraper, String page_url) {
this.scraper = scraper;
this.url = page_url;
this.error = 0;
}
/**
* Started by execute().
* Gets the HTML doc from the input string and scrapes the event links from it.
*
* @param voids
* @return
*/
@Override
protected Void doInBackground(Void... voids) {
try {
// use default android user agent
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
Document document = Jsoup.connect(url).userAgent(user_agent).get();
if (document == null) {
throw new IOException();
}
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
List<String> event_links_href = document
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
.eachAttr("href");
for (String link : event_links_href) {
this.event_links.add("https://www.facebook.com" + link);
}
} catch (IOException e) {
e.printStackTrace();
this.error = R.string.error_connection;
} catch (Exception e) {
e.printStackTrace();
this.error = R.string.error_unknown;
}
return null;
}
@Override
protected void onPreExecute() {
super.onPreExecute();
}
/**
* When scraping is finished, the scraper callback will receive the link list.
*
* @param aVoid
*/
protected void onPostExecute(Void aVoid) {
super.onPostExecute(aVoid);
this.scraper.scrapePageResultCallback(this.event_links, this.error);
}
}

View File

@ -1,6 +1,7 @@
package com.akdev.nofbeventscraper; package com.akdev.nofbeventscraper;
import android.content.SharedPreferences; import android.content.SharedPreferences;
import android.os.AsyncTask;
import androidx.preference.PreferenceManager; import androidx.preference.PreferenceManager;
@ -8,6 +9,7 @@ import java.lang.ref.WeakReference;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -17,8 +19,9 @@ import static com.akdev.nofbeventscraper.FbEvent.createEventList;
public class FbScraper { public class FbScraper {
protected List<FbEvent> events; protected List<FbEvent> events;
protected List<AsyncTask> tasks;
int remaining_events = 0;
url_type_enum url_type = url_type_enum.EVENT; url_type_enum url_type = url_type_enum.EVENT;
private int error;
private String input_url; private String input_url;
private WeakReference<MainActivity> main; // no context leak with WeakReference private WeakReference<MainActivity> main; // no context leak with WeakReference
@ -32,12 +35,31 @@ public class FbScraper {
this.main = main; this.main = main;
this.input_url = input_url; this.input_url = input_url;
this.events = createEventList(); this.events = createEventList();
this.tasks = new ArrayList<>();
run(); run();
} }
protected String getPageUrl(String url) throws URISyntaxException, MalformedURLException { protected String getPageUrl(String url) throws URISyntaxException, MalformedURLException {
throw new URISyntaxException(url, "not implemented");
// check for url format
new URL(url).toURI();
String regex = "(facebook.com/)(pg/)?([^/?]*)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(url);
if (matcher.find()) {
String url_prefix = "https://mbasic.facebook.com/";
String url_suffix = "?v=events";
return url_prefix + matcher.group(3) + url_suffix;
} else {
throw new URISyntaxException(url, "Does not contain page.");
}
} }
/** /**
@ -81,6 +103,7 @@ public class FbScraper {
void scrapeEvent(String event_url) { void scrapeEvent(String event_url) {
FbEventScraper scraper = new FbEventScraper(this, event_url); FbEventScraper scraper = new FbEventScraper(this, event_url);
tasks.add(scraper);
scraper.execute(); scraper.execute();
} }
@ -89,31 +112,55 @@ public class FbScraper {
if (url_type == url_type_enum.EVENT) { if (url_type == url_type_enum.EVENT) {
if (event != null) { if (event != null) {
main.get().addEvent(event); main.get().addEvent(event);
main.get().input_helper(R.string.done, false); main.get().input_helper(main.get().getString(R.string.done), false);
} else { } else {
main.get().input_helper(error, true); main.get().input_helper(main.get().getString(error), true);
} }
killAllTasks();
} else { } else {
main.get().addEvent(event); main.get().addEvent(event);
remaining_events--;
if (remaining_events <= 0) {
main.get().input_helper(main.get().getString(R.string.done), false);
killAllTasks();
}
}
}
/**
* cancel vestigial async tasks
*/
void killAllTasks() {
for (AsyncTask task : tasks) {
task.cancel(true);
task = null;
} }
} }
void scrapePage(String page_url) { void scrapePage(String page_url) {
/*
FbPageScraper scraper = new FbPageScraper(this, page_url); FbPageScraper scraper = new FbPageScraper(this, page_url);
tasks.add(scraper);
scraper.execute(); scraper.execute();
*/
} }
protected void scrapePageResultCallback(String[] event_urls, int error) { protected void scrapePageResultCallback(List<String> event_urls, int error) {
if (event_urls != null) { if (event_urls.size() > 0) {
remaining_events = event_urls.size();
main.get().input_helper(main.get().getString(R.string.found_events, event_urls.size()), false);
for (String event_url : event_urls) { for (String event_url : event_urls) {
scrapeEvent(event_url); try {
String url = getEventUrl(event_url);
scrapeEvent(url);
} catch (URISyntaxException | MalformedURLException e) {
// ignore this event
}
} }
} else if (url_type == url_type_enum.PAGE) { } else if (url_type == url_type_enum.PAGE) {
main.get().input_helper(error, true); main.get().input_helper(main.get().getString(error), true);
} }
} }
@ -137,7 +184,7 @@ public class FbScraper {
} catch (URISyntaxException | MalformedURLException e) { } catch (URISyntaxException | MalformedURLException e) {
url_type = url_type_enum.INVALID; url_type = url_type_enum.INVALID;
main.get().input_helper(R.string.error_url, true); main.get().input_helper(main.get().getString(R.string.error_url), true);
} }
} }

View File

@ -12,7 +12,6 @@ import android.view.Menu;
import android.view.MenuItem; import android.view.MenuItem;
import android.view.View; import android.view.View;
import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity; import androidx.appcompat.app.AppCompatActivity;
import androidx.appcompat.view.menu.MenuBuilder; import androidx.appcompat.view.menu.MenuBuilder;
import androidx.appcompat.widget.Toolbar; import androidx.appcompat.widget.Toolbar;
@ -67,11 +66,11 @@ public class MainActivity extends AppCompatActivity {
} }
/** /**
* Callback after clearing events from settings needed. * Callback for Restoring data
*/ */
@Override @Override
public void onRestart() { public void onResume() {
super.onRestart(); super.onResume();
events.clear(); events.clear();
events.addAll(getSavedEvents()); events.addAll(getSavedEvents());
@ -82,8 +81,8 @@ public class MainActivity extends AppCompatActivity {
* Save events list to SharedPreferences as JSON * Save events list to SharedPreferences as JSON
*/ */
@Override @Override
public void onSaveInstanceState(@NonNull Bundle state) { public void onPause() {
super.onSaveInstanceState(state); super.onPause();
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
SharedPreferences.Editor prefs_edit = prefs.edit(); SharedPreferences.Editor prefs_edit = prefs.edit();
@ -160,7 +159,7 @@ public class MainActivity extends AppCompatActivity {
startScraping(); startScraping();
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
input_helper(R.string.error_clipboard_empty, true); input_helper(getString(R.string.error_clipboard_empty), true);
} }
} }
}); });
@ -171,9 +170,10 @@ public class MainActivity extends AppCompatActivity {
View.OnClickListener listener = new View.OnClickListener() { View.OnClickListener listener = new View.OnClickListener() {
@Override @Override
public void onClick(View view) { public void onClick(View view) {
input_helper(R.string.helper_add_link, true); input_helper(getString(R.string.helper_add_link), true);
edit_text_uri_input.setText(null); edit_text_uri_input.setText(null);
input_helper(R.string.helper_add_link, false); scraper.killAllTasks();
input_helper(getString(R.string.helper_add_link), false);
} }
}; };
layout_uri_input.setErrorIconOnClickListener(listener); layout_uri_input.setErrorIconOnClickListener(listener);
@ -225,9 +225,11 @@ public class MainActivity extends AppCompatActivity {
scraper = new FbScraper(new WeakReference<>(this), url); scraper = new FbScraper(new WeakReference<>(this), url);
} }
public void input_helper(Integer resId, boolean error) { public void input_helper(String str, boolean error) {
String str = (resId != null) ? getString(resId) : " "; if (str == null) {
str = " ";
} // keep spacing
if (error) { if (error) {
layout_uri_input.setError(str); layout_uri_input.setError(str);

View File

@ -19,4 +19,5 @@
<string name="preferences_event_setting">Veranstaltungsliste löschen</string> <string name="preferences_event_setting">Veranstaltungsliste löschen</string>
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string> <string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
<string name="done">Fertig</string> <string name="done">Fertig</string>
<string name="found_events">%1$d Veranstaltungen gefunden</string>
</resources> </resources>

View File

@ -31,5 +31,6 @@
<string name="preferences_event_snackbar">Events list cleared</string> <string name="preferences_event_snackbar">Events list cleared</string>
<string name="event_placeholder" translatable="false">Placeholder</string> <string name="event_placeholder" translatable="false">Placeholder</string>
<string name="done">Done</string> <string name="done">Done</string>
<string name="found_events">Found %1$d events</string>
</resources> </resources>