scrape more events from pages
This commit is contained in:
parent
e549ca7676
commit
43913ccd21
|
@ -1,7 +1,10 @@
|
|||
package com.akdev.nofbeventscraper;
|
||||
|
||||
import android.content.SharedPreferences;
|
||||
import android.os.AsyncTask;
|
||||
|
||||
import androidx.preference.PreferenceManager;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
|
@ -44,32 +47,50 @@ public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
|||
@Override
|
||||
protected Void doInBackground(Void... voids) {
|
||||
|
||||
try {
|
||||
// use default android user agent
|
||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||
|
||||
if (document == null) {
|
||||
throw new IOException();
|
||||
do {
|
||||
try {
|
||||
// use default android user agent
|
||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||
|
||||
if (document == null) {
|
||||
throw new IOException();
|
||||
}
|
||||
|
||||
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
|
||||
|
||||
List<String> event_links_href = document
|
||||
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
|
||||
.eachAttr("href");
|
||||
|
||||
for (String link : event_links_href) {
|
||||
this.event_links.add("https://www.facebook.com" + link);
|
||||
}
|
||||
|
||||
SharedPreferences shared_prefs = PreferenceManager
|
||||
.getDefaultSharedPreferences(scraper.main.get());
|
||||
|
||||
int max = shared_prefs.getInt("page_event_max", 5);
|
||||
|
||||
if (event_links.size() < max) {
|
||||
String next_url = document
|
||||
.getElementsByAttributeValueMatching("href", "has_more=1")
|
||||
.first().attr("href");
|
||||
|
||||
this.url = "https://mbasic.facebook.com" + next_url;
|
||||
} else {
|
||||
url = null;
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
this.error = R.string.error_connection;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
this.error = R.string.error_unknown;
|
||||
}
|
||||
|
||||
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
|
||||
|
||||
List<String> event_links_href = document
|
||||
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
|
||||
.eachAttr("href");
|
||||
|
||||
for (String link : event_links_href) {
|
||||
this.event_links.add("https://www.facebook.com" + link);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
this.error = R.string.error_connection;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
this.error = R.string.error_unknown;
|
||||
}
|
||||
} while (url != null);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ public class FbScraper {
|
|||
protected List<AsyncTask> tasks;
|
||||
url_type_enum url_type = url_type_enum.EVENT;
|
||||
private String input_url;
|
||||
private WeakReference<MainActivity> main; // no context leak with WeakReference
|
||||
protected WeakReference<MainActivity> main; // no context leak with WeakReference
|
||||
|
||||
/**
|
||||
* Constructor with WeakReference to the main activity, to add events.
|
||||
|
|
|
@ -20,4 +20,6 @@
|
|||
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
|
||||
<string name="done">Fertig</string>
|
||||
<string name="undo">Rückgängig</string>
|
||||
<string name="preferences_page_event_max_summary">Maximale Anzahl Events, die von einer einzelnen Seite geladen werden sollen.</string>
|
||||
<string name="preferences_page_event_max">Veranstaltungslimit für Seiten</string>
|
||||
</resources>
|
|
@ -30,9 +30,13 @@
|
|||
<string name="preferences_event_setting">Clear event list</string>
|
||||
<string name="preferences_event_snackbar">Events list cleared</string>
|
||||
|
||||
<string name="preferences_page_event_max_summary">Maximum amount of events scraped from a single page link.</string>
|
||||
<string name="preferences_page_event_max">Page event limit</string>
|
||||
|
||||
<!-- others -->
|
||||
<string name="event_placeholder" translatable="false">Placeholder</string>
|
||||
<string name="done">Done</string>
|
||||
<string name="undo">Undo</string>
|
||||
|
||||
|
||||
</resources>
|
||||
|
|
|
@ -16,6 +16,15 @@
|
|||
<PreferenceCategory app:title="@string/preferences_events_header">
|
||||
|
||||
|
||||
<SeekBarPreference
|
||||
android:defaultValue="5"
|
||||
app:showSeekBarValue="true"
|
||||
app:min="5"
|
||||
android:max="30"
|
||||
android:summary="@string/preferences_page_event_max_summary"
|
||||
android:key="page_event_max"
|
||||
android:title="@string/preferences_page_event_max" />
|
||||
|
||||
<Preference
|
||||
android:key="event_reset"
|
||||
android:title="@string/preferences_event_setting" />
|
||||
|
|
Loading…
Reference in New Issue