Fix bug where the cookies need to be accepted for mbasic scraping
This commit is contained in:
parent
2efaafa38b
commit
08c1040679
|
@ -0,0 +1,53 @@
|
|||
package com.akdev.nofbeventscraper;
|
||||
|
||||
import org.jsoup.Connection;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Element;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class DocumentReceiver {
|
||||
|
||||
public static org.jsoup.nodes.Document getDocument(String url) {
|
||||
|
||||
org.jsoup.nodes.Document document;
|
||||
|
||||
try {
|
||||
// use default android user agent
|
||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||
|
||||
Connection connection = Jsoup.connect(url).userAgent(user_agent).followRedirects(true);
|
||||
|
||||
Connection.Response response = connection.execute();
|
||||
|
||||
document = response.parse();
|
||||
|
||||
try {
|
||||
// accept cookies needed?
|
||||
Element form = document.select("form[method=post]").first();
|
||||
String action = form.attr("action");
|
||||
|
||||
List<String> names = form.select("input").eachAttr("name");
|
||||
List<String> values = form.select("input").eachAttr("value");
|
||||
|
||||
Map<String, String> data = new HashMap<String, String>();
|
||||
|
||||
for (int i = 0; i < names.size(); i++) {
|
||||
data.put(names.get(i), values.get(i));
|
||||
}
|
||||
|
||||
document = connection.url("https://mbasic.facebook.com" + action)
|
||||
.cookies(response.cookies())
|
||||
.method(Connection.Method.POST)
|
||||
.data(data)
|
||||
.post();
|
||||
|
||||
} catch (Exception ignore) {
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
return document;
|
||||
}
|
||||
}
|
|
@ -4,7 +4,6 @@ import android.os.AsyncTask;
|
|||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -144,11 +143,9 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
|||
@Override
|
||||
protected Void doInBackground(Void... voids) {
|
||||
|
||||
try {
|
||||
// use default android user agent
|
||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||
Document document = DocumentReceiver.getDocument(url);
|
||||
|
||||
try {
|
||||
if (document == null) {
|
||||
throw new IOException();
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@ import android.os.AsyncTask;
|
|||
|
||||
import androidx.preference.PreferenceManager;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -51,8 +50,8 @@ public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
|||
do {
|
||||
try {
|
||||
// use default android user agent
|
||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||
Document document = Jsoup.connect(url).userAgent(user_agent).get();
|
||||
|
||||
Document document = DocumentReceiver.getDocument(url);
|
||||
|
||||
if (document == null) {
|
||||
throw new IOException();
|
||||
|
@ -80,7 +79,7 @@ public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
|||
int max = shared_prefs.getInt("page_event_max", 5);
|
||||
|
||||
if (event_links.size() < max) {
|
||||
// find "next page
|
||||
// find next page
|
||||
try {
|
||||
String next_url = document
|
||||
.getElementsByAttributeValueMatching("href", "has_more=1")
|
||||
|
|
Loading…
Reference in New Issue