pagelinks are supported, currently limited to 5 events
This commit is contained in:
parent
af504084fe
commit
626128b5dc
|
@ -1,28 +1,16 @@
|
|||
package com.akdev.nofbeventscraper;
|
||||
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.AsyncTask;
|
||||
|
||||
import androidx.preference.PreferenceManager;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.ref.WeakReference;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static com.akdev.nofbeventscraper.FbEvent.createEventList;
|
||||
|
||||
/**
|
||||
* This class can asynchronously scrape public facebook events
|
||||
|
@ -36,7 +24,7 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
|||
private FbEvent event;
|
||||
|
||||
/**
|
||||
* Constructor with WeakReference to the main activity, to update it's text fields.
|
||||
* Constructor with reference to scraper to return results.
|
||||
*
|
||||
* @param scraper Reference to FbScraper
|
||||
* @param input_url Input url to scrape from
|
||||
|
@ -162,6 +150,7 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
|||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||
|
||||
if (document == null) {
|
||||
throw new IOException();
|
||||
}
|
||||
String json = document
|
||||
.select("script[type = application/ld+json]")
|
||||
|
@ -209,7 +198,8 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
|||
}
|
||||
|
||||
/**
|
||||
* When scraping is finished, the scraper callback will receive the Event.
|
||||
* When scraping is finished, the scraper callback will receive the event.
|
||||
*
|
||||
* @param aVoid
|
||||
*/
|
||||
protected void onPostExecute(Void aVoid) {
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
package com.akdev.nofbeventscraper;
|
||||
|
||||
import android.os.AsyncTask;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This class can asynchronously scrape public facebook pages for event ids
|
||||
* It returns a String list of event urls
|
||||
*/
|
||||
public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
||||
|
||||
private FbScraper scraper;
|
||||
private int error;
|
||||
private String url;
|
||||
private List<String> event_links = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Constructor with reference to scraper to return results.
|
||||
*
|
||||
* @param scraper Reference to FbScraper
|
||||
* @param page_url Input url to scrape from
|
||||
*/
|
||||
FbPageScraper(FbScraper scraper, String page_url) {
|
||||
|
||||
this.scraper = scraper;
|
||||
this.url = page_url;
|
||||
this.error = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Started by execute().
|
||||
* Gets the HTML doc from the input string and scrapes the event links from it.
|
||||
*
|
||||
* @param voids
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
protected Void doInBackground(Void... voids) {
|
||||
|
||||
try {
|
||||
// use default android user agent
|
||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||
|
||||
if (document == null) {
|
||||
throw new IOException();
|
||||
}
|
||||
|
||||
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
|
||||
|
||||
List<String> event_links_href = document
|
||||
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
|
||||
.eachAttr("href");
|
||||
|
||||
for (String link : event_links_href) {
|
||||
this.event_links.add("https://www.facebook.com" + link);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
this.error = R.string.error_connection;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
this.error = R.string.error_unknown;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onPreExecute() {
|
||||
super.onPreExecute();
|
||||
}
|
||||
|
||||
/**
|
||||
* When scraping is finished, the scraper callback will receive the link list.
|
||||
*
|
||||
* @param aVoid
|
||||
*/
|
||||
protected void onPostExecute(Void aVoid) {
|
||||
super.onPostExecute(aVoid);
|
||||
|
||||
this.scraper.scrapePageResultCallback(this.event_links, this.error);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package com.akdev.nofbeventscraper;
|
||||
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.AsyncTask;
|
||||
|
||||
import androidx.preference.PreferenceManager;
|
||||
|
||||
|
@ -8,6 +9,7 @@ import java.lang.ref.WeakReference;
|
|||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -17,8 +19,9 @@ import static com.akdev.nofbeventscraper.FbEvent.createEventList;
|
|||
public class FbScraper {
|
||||
|
||||
protected List<FbEvent> events;
|
||||
protected List<AsyncTask> tasks;
|
||||
int remaining_events = 0;
|
||||
url_type_enum url_type = url_type_enum.EVENT;
|
||||
private int error;
|
||||
private String input_url;
|
||||
private WeakReference<MainActivity> main; // no context leak with WeakReference
|
||||
|
||||
|
@ -32,12 +35,31 @@ public class FbScraper {
|
|||
this.main = main;
|
||||
this.input_url = input_url;
|
||||
this.events = createEventList();
|
||||
this.tasks = new ArrayList<>();
|
||||
|
||||
run();
|
||||
}
|
||||
|
||||
protected String getPageUrl(String url) throws URISyntaxException, MalformedURLException {
|
||||
throw new URISyntaxException(url, "not implemented");
|
||||
|
||||
// check for url format
|
||||
new URL(url).toURI();
|
||||
|
||||
String regex = "(facebook.com/)(pg/)?([^/?]*)";
|
||||
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
Matcher matcher = pattern.matcher(url);
|
||||
|
||||
if (matcher.find()) {
|
||||
|
||||
String url_prefix = "https://mbasic.facebook.com/";
|
||||
String url_suffix = "?v=events";
|
||||
|
||||
return url_prefix + matcher.group(3) + url_suffix;
|
||||
|
||||
} else {
|
||||
throw new URISyntaxException(url, "Does not contain page.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -81,6 +103,7 @@ public class FbScraper {
|
|||
|
||||
void scrapeEvent(String event_url) {
|
||||
FbEventScraper scraper = new FbEventScraper(this, event_url);
|
||||
tasks.add(scraper);
|
||||
scraper.execute();
|
||||
}
|
||||
|
||||
|
@ -89,31 +112,55 @@ public class FbScraper {
|
|||
if (url_type == url_type_enum.EVENT) {
|
||||
if (event != null) {
|
||||
main.get().addEvent(event);
|
||||
main.get().input_helper(R.string.done, false);
|
||||
main.get().input_helper(main.get().getString(R.string.done), false);
|
||||
} else {
|
||||
main.get().input_helper(error, true);
|
||||
main.get().input_helper(main.get().getString(error), true);
|
||||
}
|
||||
killAllTasks();
|
||||
|
||||
} else {
|
||||
main.get().addEvent(event);
|
||||
remaining_events--;
|
||||
|
||||
if (remaining_events <= 0) {
|
||||
main.get().input_helper(main.get().getString(R.string.done), false);
|
||||
killAllTasks();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* cancel vestigial async tasks
|
||||
*/
|
||||
void killAllTasks() {
|
||||
for (AsyncTask task : tasks) {
|
||||
task.cancel(true);
|
||||
task = null;
|
||||
}
|
||||
}
|
||||
|
||||
void scrapePage(String page_url) {
|
||||
/*
|
||||
FbPageScraper scraper = new FbPageScraper(this, page_url);
|
||||
|
||||
tasks.add(scraper);
|
||||
scraper.execute();
|
||||
*/
|
||||
}
|
||||
|
||||
protected void scrapePageResultCallback(String[] event_urls, int error) {
|
||||
protected void scrapePageResultCallback(List<String> event_urls, int error) {
|
||||
|
||||
if (event_urls != null) {
|
||||
if (event_urls.size() > 0) {
|
||||
remaining_events = event_urls.size();
|
||||
main.get().input_helper(main.get().getString(R.string.found_events, event_urls.size()), false);
|
||||
for (String event_url : event_urls) {
|
||||
scrapeEvent(event_url);
|
||||
try {
|
||||
String url = getEventUrl(event_url);
|
||||
scrapeEvent(url);
|
||||
} catch (URISyntaxException | MalformedURLException e) {
|
||||
// ignore this event
|
||||
}
|
||||
}
|
||||
} else if (url_type == url_type_enum.PAGE) {
|
||||
main.get().input_helper(error, true);
|
||||
main.get().input_helper(main.get().getString(error), true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,7 +184,7 @@ public class FbScraper {
|
|||
|
||||
} catch (URISyntaxException | MalformedURLException e) {
|
||||
url_type = url_type_enum.INVALID;
|
||||
main.get().input_helper(R.string.error_url, true);
|
||||
main.get().input_helper(main.get().getString(R.string.error_url), true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,6 @@ import android.view.Menu;
|
|||
import android.view.MenuItem;
|
||||
import android.view.View;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.appcompat.app.AppCompatActivity;
|
||||
import androidx.appcompat.view.menu.MenuBuilder;
|
||||
import androidx.appcompat.widget.Toolbar;
|
||||
|
@ -67,11 +66,11 @@ public class MainActivity extends AppCompatActivity {
|
|||
}
|
||||
|
||||
/**
|
||||
* Callback after clearing events from settings needed.
|
||||
* Callback for Restoring data
|
||||
*/
|
||||
@Override
|
||||
public void onRestart() {
|
||||
super.onRestart();
|
||||
public void onResume() {
|
||||
super.onResume();
|
||||
|
||||
events.clear();
|
||||
events.addAll(getSavedEvents());
|
||||
|
@ -82,8 +81,8 @@ public class MainActivity extends AppCompatActivity {
|
|||
* Save events list to SharedPreferences as JSON
|
||||
*/
|
||||
@Override
|
||||
public void onSaveInstanceState(@NonNull Bundle state) {
|
||||
super.onSaveInstanceState(state);
|
||||
public void onPause() {
|
||||
super.onPause();
|
||||
|
||||
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
|
||||
SharedPreferences.Editor prefs_edit = prefs.edit();
|
||||
|
@ -160,7 +159,7 @@ public class MainActivity extends AppCompatActivity {
|
|||
startScraping();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
input_helper(R.string.error_clipboard_empty, true);
|
||||
input_helper(getString(R.string.error_clipboard_empty), true);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -171,9 +170,10 @@ public class MainActivity extends AppCompatActivity {
|
|||
View.OnClickListener listener = new View.OnClickListener() {
|
||||
@Override
|
||||
public void onClick(View view) {
|
||||
input_helper(R.string.helper_add_link, true);
|
||||
input_helper(getString(R.string.helper_add_link), true);
|
||||
edit_text_uri_input.setText(null);
|
||||
input_helper(R.string.helper_add_link, false);
|
||||
scraper.killAllTasks();
|
||||
input_helper(getString(R.string.helper_add_link), false);
|
||||
}
|
||||
};
|
||||
layout_uri_input.setErrorIconOnClickListener(listener);
|
||||
|
@ -225,9 +225,11 @@ public class MainActivity extends AppCompatActivity {
|
|||
scraper = new FbScraper(new WeakReference<>(this), url);
|
||||
}
|
||||
|
||||
public void input_helper(Integer resId, boolean error) {
|
||||
public void input_helper(String str, boolean error) {
|
||||
|
||||
String str = (resId != null) ? getString(resId) : " ";
|
||||
if (str == null) {
|
||||
str = " ";
|
||||
} // keep spacing
|
||||
|
||||
if (error) {
|
||||
layout_uri_input.setError(str);
|
||||
|
|
|
@ -19,4 +19,5 @@
|
|||
<string name="preferences_event_setting">Veranstaltungsliste löschen</string>
|
||||
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
|
||||
<string name="done">Fertig</string>
|
||||
<string name="found_events">%1$d Veranstaltungen gefunden</string>
|
||||
</resources>
|
|
@ -31,5 +31,6 @@
|
|||
<string name="preferences_event_snackbar">Events list cleared</string>
|
||||
<string name="event_placeholder" translatable="false">Placeholder</string>
|
||||
<string name="done">Done</string>
|
||||
<string name="found_events">Found %1$d events</string>
|
||||
|
||||
</resources>
|
||||
|
|
Loading…
Reference in New Issue