pagelinks are supported, currently limited to 5 events
This commit is contained in:
parent
af504084fe
commit
626128b5dc
|
@ -1,28 +1,16 @@
|
||||||
package com.akdev.nofbeventscraper;
|
package com.akdev.nofbeventscraper;
|
||||||
|
|
||||||
import android.content.SharedPreferences;
|
|
||||||
import android.os.AsyncTask;
|
import android.os.AsyncTask;
|
||||||
|
|
||||||
import androidx.preference.PreferenceManager;
|
|
||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.ref.WeakReference;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import static com.akdev.nofbeventscraper.FbEvent.createEventList;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class can asynchronously scrape public facebook events
|
* This class can asynchronously scrape public facebook events
|
||||||
|
@ -36,7 +24,7 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
||||||
private FbEvent event;
|
private FbEvent event;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor with WeakReference to the main activity, to update it's text fields.
|
* Constructor with reference to scraper to return results.
|
||||||
*
|
*
|
||||||
* @param scraper Reference to FbScraper
|
* @param scraper Reference to FbScraper
|
||||||
* @param input_url Input url to scrape from
|
* @param input_url Input url to scrape from
|
||||||
|
@ -162,6 +150,7 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
||||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||||
|
|
||||||
if (document == null) {
|
if (document == null) {
|
||||||
|
throw new IOException();
|
||||||
}
|
}
|
||||||
String json = document
|
String json = document
|
||||||
.select("script[type = application/ld+json]")
|
.select("script[type = application/ld+json]")
|
||||||
|
@ -209,7 +198,8 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* When scraping is finished, the scraper callback will receive the Event.
|
* When scraping is finished, the scraper callback will receive the event.
|
||||||
|
*
|
||||||
* @param aVoid
|
* @param aVoid
|
||||||
*/
|
*/
|
||||||
protected void onPostExecute(Void aVoid) {
|
protected void onPostExecute(Void aVoid) {
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
package com.akdev.nofbeventscraper;
|
||||||
|
|
||||||
|
import android.os.AsyncTask;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class can asynchronously scrape public facebook pages for event ids
|
||||||
|
* It returns a String list of event urls
|
||||||
|
*/
|
||||||
|
public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
||||||
|
|
||||||
|
private FbScraper scraper;
|
||||||
|
private int error;
|
||||||
|
private String url;
|
||||||
|
private List<String> event_links = new ArrayList<String>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor with reference to scraper to return results.
|
||||||
|
*
|
||||||
|
* @param scraper Reference to FbScraper
|
||||||
|
* @param page_url Input url to scrape from
|
||||||
|
*/
|
||||||
|
FbPageScraper(FbScraper scraper, String page_url) {
|
||||||
|
|
||||||
|
this.scraper = scraper;
|
||||||
|
this.url = page_url;
|
||||||
|
this.error = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Started by execute().
|
||||||
|
* Gets the HTML doc from the input string and scrapes the event links from it.
|
||||||
|
*
|
||||||
|
* @param voids
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Void doInBackground(Void... voids) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
// use default android user agent
|
||||||
|
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||||
|
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||||
|
|
||||||
|
if (document == null) {
|
||||||
|
throw new IOException();
|
||||||
|
}
|
||||||
|
|
||||||
|
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
|
||||||
|
|
||||||
|
List<String> event_links_href = document
|
||||||
|
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
|
||||||
|
.eachAttr("href");
|
||||||
|
|
||||||
|
for (String link : event_links_href) {
|
||||||
|
this.event_links.add("https://www.facebook.com" + link);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
this.error = R.string.error_connection;
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
this.error = R.string.error_unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onPreExecute() {
|
||||||
|
super.onPreExecute();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When scraping is finished, the scraper callback will receive the link list.
|
||||||
|
*
|
||||||
|
* @param aVoid
|
||||||
|
*/
|
||||||
|
protected void onPostExecute(Void aVoid) {
|
||||||
|
super.onPostExecute(aVoid);
|
||||||
|
|
||||||
|
this.scraper.scrapePageResultCallback(this.event_links, this.error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package com.akdev.nofbeventscraper;
|
package com.akdev.nofbeventscraper;
|
||||||
|
|
||||||
import android.content.SharedPreferences;
|
import android.content.SharedPreferences;
|
||||||
|
import android.os.AsyncTask;
|
||||||
|
|
||||||
import androidx.preference.PreferenceManager;
|
import androidx.preference.PreferenceManager;
|
||||||
|
|
||||||
|
@ -8,6 +9,7 @@ import java.lang.ref.WeakReference;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
@ -17,8 +19,9 @@ import static com.akdev.nofbeventscraper.FbEvent.createEventList;
|
||||||
public class FbScraper {
|
public class FbScraper {
|
||||||
|
|
||||||
protected List<FbEvent> events;
|
protected List<FbEvent> events;
|
||||||
|
protected List<AsyncTask> tasks;
|
||||||
|
int remaining_events = 0;
|
||||||
url_type_enum url_type = url_type_enum.EVENT;
|
url_type_enum url_type = url_type_enum.EVENT;
|
||||||
private int error;
|
|
||||||
private String input_url;
|
private String input_url;
|
||||||
private WeakReference<MainActivity> main; // no context leak with WeakReference
|
private WeakReference<MainActivity> main; // no context leak with WeakReference
|
||||||
|
|
||||||
|
@ -32,12 +35,31 @@ public class FbScraper {
|
||||||
this.main = main;
|
this.main = main;
|
||||||
this.input_url = input_url;
|
this.input_url = input_url;
|
||||||
this.events = createEventList();
|
this.events = createEventList();
|
||||||
|
this.tasks = new ArrayList<>();
|
||||||
|
|
||||||
run();
|
run();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getPageUrl(String url) throws URISyntaxException, MalformedURLException {
|
protected String getPageUrl(String url) throws URISyntaxException, MalformedURLException {
|
||||||
throw new URISyntaxException(url, "not implemented");
|
|
||||||
|
// check for url format
|
||||||
|
new URL(url).toURI();
|
||||||
|
|
||||||
|
String regex = "(facebook.com/)(pg/)?([^/?]*)";
|
||||||
|
|
||||||
|
Pattern pattern = Pattern.compile(regex);
|
||||||
|
Matcher matcher = pattern.matcher(url);
|
||||||
|
|
||||||
|
if (matcher.find()) {
|
||||||
|
|
||||||
|
String url_prefix = "https://mbasic.facebook.com/";
|
||||||
|
String url_suffix = "?v=events";
|
||||||
|
|
||||||
|
return url_prefix + matcher.group(3) + url_suffix;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
throw new URISyntaxException(url, "Does not contain page.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -81,6 +103,7 @@ public class FbScraper {
|
||||||
|
|
||||||
void scrapeEvent(String event_url) {
|
void scrapeEvent(String event_url) {
|
||||||
FbEventScraper scraper = new FbEventScraper(this, event_url);
|
FbEventScraper scraper = new FbEventScraper(this, event_url);
|
||||||
|
tasks.add(scraper);
|
||||||
scraper.execute();
|
scraper.execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -89,31 +112,55 @@ public class FbScraper {
|
||||||
if (url_type == url_type_enum.EVENT) {
|
if (url_type == url_type_enum.EVENT) {
|
||||||
if (event != null) {
|
if (event != null) {
|
||||||
main.get().addEvent(event);
|
main.get().addEvent(event);
|
||||||
main.get().input_helper(R.string.done, false);
|
main.get().input_helper(main.get().getString(R.string.done), false);
|
||||||
} else {
|
} else {
|
||||||
main.get().input_helper(error, true);
|
main.get().input_helper(main.get().getString(error), true);
|
||||||
}
|
}
|
||||||
|
killAllTasks();
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
main.get().addEvent(event);
|
main.get().addEvent(event);
|
||||||
|
remaining_events--;
|
||||||
|
|
||||||
|
if (remaining_events <= 0) {
|
||||||
|
main.get().input_helper(main.get().getString(R.string.done), false);
|
||||||
|
killAllTasks();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* cancel vestigial async tasks
|
||||||
|
*/
|
||||||
|
void killAllTasks() {
|
||||||
|
for (AsyncTask task : tasks) {
|
||||||
|
task.cancel(true);
|
||||||
|
task = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void scrapePage(String page_url) {
|
void scrapePage(String page_url) {
|
||||||
/*
|
|
||||||
FbPageScraper scraper = new FbPageScraper(this, page_url);
|
FbPageScraper scraper = new FbPageScraper(this, page_url);
|
||||||
|
|
||||||
|
tasks.add(scraper);
|
||||||
scraper.execute();
|
scraper.execute();
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void scrapePageResultCallback(String[] event_urls, int error) {
|
protected void scrapePageResultCallback(List<String> event_urls, int error) {
|
||||||
|
|
||||||
if (event_urls != null) {
|
if (event_urls.size() > 0) {
|
||||||
|
remaining_events = event_urls.size();
|
||||||
|
main.get().input_helper(main.get().getString(R.string.found_events, event_urls.size()), false);
|
||||||
for (String event_url : event_urls) {
|
for (String event_url : event_urls) {
|
||||||
scrapeEvent(event_url);
|
try {
|
||||||
|
String url = getEventUrl(event_url);
|
||||||
|
scrapeEvent(url);
|
||||||
|
} catch (URISyntaxException | MalformedURLException e) {
|
||||||
|
// ignore this event
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (url_type == url_type_enum.PAGE) {
|
} else if (url_type == url_type_enum.PAGE) {
|
||||||
main.get().input_helper(error, true);
|
main.get().input_helper(main.get().getString(error), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -137,7 +184,7 @@ public class FbScraper {
|
||||||
|
|
||||||
} catch (URISyntaxException | MalformedURLException e) {
|
} catch (URISyntaxException | MalformedURLException e) {
|
||||||
url_type = url_type_enum.INVALID;
|
url_type = url_type_enum.INVALID;
|
||||||
main.get().input_helper(R.string.error_url, true);
|
main.get().input_helper(main.get().getString(R.string.error_url), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ import android.view.Menu;
|
||||||
import android.view.MenuItem;
|
import android.view.MenuItem;
|
||||||
import android.view.View;
|
import android.view.View;
|
||||||
|
|
||||||
import androidx.annotation.NonNull;
|
|
||||||
import androidx.appcompat.app.AppCompatActivity;
|
import androidx.appcompat.app.AppCompatActivity;
|
||||||
import androidx.appcompat.view.menu.MenuBuilder;
|
import androidx.appcompat.view.menu.MenuBuilder;
|
||||||
import androidx.appcompat.widget.Toolbar;
|
import androidx.appcompat.widget.Toolbar;
|
||||||
|
@ -67,11 +66,11 @@ public class MainActivity extends AppCompatActivity {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Callback after clearing events from settings needed.
|
* Callback for Restoring data
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void onRestart() {
|
public void onResume() {
|
||||||
super.onRestart();
|
super.onResume();
|
||||||
|
|
||||||
events.clear();
|
events.clear();
|
||||||
events.addAll(getSavedEvents());
|
events.addAll(getSavedEvents());
|
||||||
|
@ -82,8 +81,8 @@ public class MainActivity extends AppCompatActivity {
|
||||||
* Save events list to SharedPreferences as JSON
|
* Save events list to SharedPreferences as JSON
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void onSaveInstanceState(@NonNull Bundle state) {
|
public void onPause() {
|
||||||
super.onSaveInstanceState(state);
|
super.onPause();
|
||||||
|
|
||||||
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
|
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
|
||||||
SharedPreferences.Editor prefs_edit = prefs.edit();
|
SharedPreferences.Editor prefs_edit = prefs.edit();
|
||||||
|
@ -160,7 +159,7 @@ public class MainActivity extends AppCompatActivity {
|
||||||
startScraping();
|
startScraping();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
input_helper(R.string.error_clipboard_empty, true);
|
input_helper(getString(R.string.error_clipboard_empty), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -171,9 +170,10 @@ public class MainActivity extends AppCompatActivity {
|
||||||
View.OnClickListener listener = new View.OnClickListener() {
|
View.OnClickListener listener = new View.OnClickListener() {
|
||||||
@Override
|
@Override
|
||||||
public void onClick(View view) {
|
public void onClick(View view) {
|
||||||
input_helper(R.string.helper_add_link, true);
|
input_helper(getString(R.string.helper_add_link), true);
|
||||||
edit_text_uri_input.setText(null);
|
edit_text_uri_input.setText(null);
|
||||||
input_helper(R.string.helper_add_link, false);
|
scraper.killAllTasks();
|
||||||
|
input_helper(getString(R.string.helper_add_link), false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
layout_uri_input.setErrorIconOnClickListener(listener);
|
layout_uri_input.setErrorIconOnClickListener(listener);
|
||||||
|
@ -225,9 +225,11 @@ public class MainActivity extends AppCompatActivity {
|
||||||
scraper = new FbScraper(new WeakReference<>(this), url);
|
scraper = new FbScraper(new WeakReference<>(this), url);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void input_helper(Integer resId, boolean error) {
|
public void input_helper(String str, boolean error) {
|
||||||
|
|
||||||
String str = (resId != null) ? getString(resId) : " ";
|
if (str == null) {
|
||||||
|
str = " ";
|
||||||
|
} // keep spacing
|
||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
layout_uri_input.setError(str);
|
layout_uri_input.setError(str);
|
||||||
|
|
|
@ -19,4 +19,5 @@
|
||||||
<string name="preferences_event_setting">Veranstaltungsliste löschen</string>
|
<string name="preferences_event_setting">Veranstaltungsliste löschen</string>
|
||||||
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
|
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
|
||||||
<string name="done">Fertig</string>
|
<string name="done">Fertig</string>
|
||||||
|
<string name="found_events">%1$d Veranstaltungen gefunden</string>
|
||||||
</resources>
|
</resources>
|
|
@ -31,5 +31,6 @@
|
||||||
<string name="preferences_event_snackbar">Events list cleared</string>
|
<string name="preferences_event_snackbar">Events list cleared</string>
|
||||||
<string name="event_placeholder" translatable="false">Placeholder</string>
|
<string name="event_placeholder" translatable="false">Placeholder</string>
|
||||||
<string name="done">Done</string>
|
<string name="done">Done</string>
|
||||||
|
<string name="found_events">Found %1$d events</string>
|
||||||
|
|
||||||
</resources>
|
</resources>
|
||||||
|
|
Loading…
Reference in New Issue