scrape more events from pages
This commit is contained in:
parent
e549ca7676
commit
43913ccd21
|
@ -1,7 +1,10 @@
|
||||||
package com.akdev.nofbeventscraper;
|
package com.akdev.nofbeventscraper;
|
||||||
|
|
||||||
|
import android.content.SharedPreferences;
|
||||||
import android.os.AsyncTask;
|
import android.os.AsyncTask;
|
||||||
|
|
||||||
|
import androidx.preference.PreferenceManager;
|
||||||
|
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
|
@ -44,32 +47,50 @@ public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
||||||
@Override
|
@Override
|
||||||
protected Void doInBackground(Void... voids) {
|
protected Void doInBackground(Void... voids) {
|
||||||
|
|
||||||
try {
|
|
||||||
// use default android user agent
|
|
||||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
|
||||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
|
||||||
|
|
||||||
if (document == null) {
|
do {
|
||||||
throw new IOException();
|
try {
|
||||||
|
// use default android user agent
|
||||||
|
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||||
|
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||||
|
|
||||||
|
if (document == null) {
|
||||||
|
throw new IOException();
|
||||||
|
}
|
||||||
|
|
||||||
|
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
|
||||||
|
|
||||||
|
List<String> event_links_href = document
|
||||||
|
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
|
||||||
|
.eachAttr("href");
|
||||||
|
|
||||||
|
for (String link : event_links_href) {
|
||||||
|
this.event_links.add("https://www.facebook.com" + link);
|
||||||
|
}
|
||||||
|
|
||||||
|
SharedPreferences shared_prefs = PreferenceManager
|
||||||
|
.getDefaultSharedPreferences(scraper.main.get());
|
||||||
|
|
||||||
|
int max = shared_prefs.getInt("page_event_max", 5);
|
||||||
|
|
||||||
|
if (event_links.size() < max) {
|
||||||
|
String next_url = document
|
||||||
|
.getElementsByAttributeValueMatching("href", "has_more=1")
|
||||||
|
.first().attr("href");
|
||||||
|
|
||||||
|
this.url = "https://mbasic.facebook.com" + next_url;
|
||||||
|
} else {
|
||||||
|
url = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
this.error = R.string.error_connection;
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
this.error = R.string.error_unknown;
|
||||||
}
|
}
|
||||||
|
} while (url != null);
|
||||||
String regex = "(/events/[0-9]*)(/\\?event_time_id=[0-9]*)?";
|
|
||||||
|
|
||||||
List<String> event_links_href = document
|
|
||||||
.getElementsByAttributeValueMatching("href", Pattern.compile(regex))
|
|
||||||
.eachAttr("href");
|
|
||||||
|
|
||||||
for (String link : event_links_href) {
|
|
||||||
this.event_links.add("https://www.facebook.com" + link);
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
this.error = R.string.error_connection;
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
this.error = R.string.error_unknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ public class FbScraper {
|
||||||
protected List<AsyncTask> tasks;
|
protected List<AsyncTask> tasks;
|
||||||
url_type_enum url_type = url_type_enum.EVENT;
|
url_type_enum url_type = url_type_enum.EVENT;
|
||||||
private String input_url;
|
private String input_url;
|
||||||
private WeakReference<MainActivity> main; // no context leak with WeakReference
|
protected WeakReference<MainActivity> main; // no context leak with WeakReference
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor with WeakReference to the main activity, to add events.
|
* Constructor with WeakReference to the main activity, to add events.
|
||||||
|
|
|
@ -20,4 +20,6 @@
|
||||||
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
|
<string name="preferences_event_snackbar">"Veranstaltungen gelöscht "</string>
|
||||||
<string name="done">Fertig</string>
|
<string name="done">Fertig</string>
|
||||||
<string name="undo">Rückgängig</string>
|
<string name="undo">Rückgängig</string>
|
||||||
|
<string name="preferences_page_event_max_summary">Maximale Anzahl Events, die von einer einzelnen Seite geladen werden sollen.</string>
|
||||||
|
<string name="preferences_page_event_max">Veranstaltungslimit für Seiten</string>
|
||||||
</resources>
|
</resources>
|
|
@ -30,9 +30,13 @@
|
||||||
<string name="preferences_event_setting">Clear event list</string>
|
<string name="preferences_event_setting">Clear event list</string>
|
||||||
<string name="preferences_event_snackbar">Events list cleared</string>
|
<string name="preferences_event_snackbar">Events list cleared</string>
|
||||||
|
|
||||||
|
<string name="preferences_page_event_max_summary">Maximum amount of events scraped from a single page link.</string>
|
||||||
|
<string name="preferences_page_event_max">Page event limit</string>
|
||||||
|
|
||||||
<!-- others -->
|
<!-- others -->
|
||||||
<string name="event_placeholder" translatable="false">Placeholder</string>
|
<string name="event_placeholder" translatable="false">Placeholder</string>
|
||||||
<string name="done">Done</string>
|
<string name="done">Done</string>
|
||||||
<string name="undo">Undo</string>
|
<string name="undo">Undo</string>
|
||||||
|
|
||||||
|
|
||||||
</resources>
|
</resources>
|
||||||
|
|
|
@ -16,6 +16,15 @@
|
||||||
<PreferenceCategory app:title="@string/preferences_events_header">
|
<PreferenceCategory app:title="@string/preferences_events_header">
|
||||||
|
|
||||||
|
|
||||||
|
<SeekBarPreference
|
||||||
|
android:defaultValue="5"
|
||||||
|
app:showSeekBarValue="true"
|
||||||
|
app:min="5"
|
||||||
|
android:max="30"
|
||||||
|
android:summary="@string/preferences_page_event_max_summary"
|
||||||
|
android:key="page_event_max"
|
||||||
|
android:title="@string/preferences_page_event_max" />
|
||||||
|
|
||||||
<Preference
|
<Preference
|
||||||
android:key="event_reset"
|
android:key="event_reset"
|
||||||
android:title="@string/preferences_event_setting" />
|
android:title="@string/preferences_event_setting" />
|
||||||
|
|
Loading…
Reference in New Issue