Allow input of arbitraty page name
Document receiver will check for 404 error if page is invalid. If page is valid just scrape it as if it was the full uri. closes #34
This commit is contained in:
parent
b4d37fbc3f
commit
7fdfd38cdc
|
@ -3,19 +3,22 @@ package com.akdev.nofbeventscraper;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
|
import org.jsoup.HttpStatusException;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class DocumentReceiver {
|
public class DocumentReceiver {
|
||||||
|
|
||||||
public static org.jsoup.nodes.Document getDocument(String url) {
|
public static org.jsoup.nodes.Document getDocument(String url) throws HttpStatusException, IOException {
|
||||||
|
|
||||||
org.jsoup.nodes.Document document;
|
org.jsoup.nodes.Document document;
|
||||||
|
|
||||||
try {
|
|
||||||
// use default android user agent
|
// use default android user agent
|
||||||
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
|
||||||
|
|
||||||
|
@ -51,10 +54,6 @@ public class DocumentReceiver {
|
||||||
|
|
||||||
} catch (Exception ignore) {
|
} catch (Exception ignore) {
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ import android.util.Log;
|
||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
import org.jsoup.HttpStatusException;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -146,9 +147,8 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
||||||
|
|
||||||
Log.d("scraperLog", "doInBackground: "+url);
|
Log.d("scraperLog", "doInBackground: "+url);
|
||||||
|
|
||||||
Document document = DocumentReceiver.getDocument(url);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
Document document = DocumentReceiver.getDocument(url);
|
||||||
if (document == null) {
|
if (document == null) {
|
||||||
throw new IOException();
|
throw new IOException();
|
||||||
}
|
}
|
||||||
|
@ -191,7 +191,10 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
|
||||||
|
|
||||||
this.event = new FbEvent(url, name, start_date, end_date, description, location, image_url);
|
this.event = new FbEvent(url, name, start_date, end_date, description, location, image_url);
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (HttpStatusException e) {
|
||||||
|
this.error = R.string.error_url;
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
this.error = R.string.error_connection;
|
this.error = R.string.error_connection;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -5,6 +5,7 @@ import android.os.AsyncTask;
|
||||||
|
|
||||||
import androidx.preference.PreferenceManager;
|
import androidx.preference.PreferenceManager;
|
||||||
|
|
||||||
|
import org.jsoup.HttpStatusException;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -95,7 +96,9 @@ public class FbPageScraper extends AsyncTask<Void, Void, Void> {
|
||||||
url = null;
|
url = null;
|
||||||
event_links = event_links.subList(0, max);
|
event_links = event_links.subList(0, max);
|
||||||
}
|
}
|
||||||
|
} catch (HttpStatusException e) {
|
||||||
|
this.error = R.string.error_url;
|
||||||
|
return null;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
this.error = R.string.error_connection;
|
this.error = R.string.error_connection;
|
||||||
|
|
|
@ -264,6 +264,17 @@ public class FbScraper {
|
||||||
url_type = url_type_enum.PAGE;
|
url_type = url_type_enum.PAGE;
|
||||||
scrapePage(page_url);
|
scrapePage(page_url);
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
|
} catch (URISyntaxException | MalformedURLException e) {
|
||||||
|
url_type = url_type_enum.INVALID;
|
||||||
|
}
|
||||||
|
// check if only page name without prefix
|
||||||
|
try {
|
||||||
|
String page_url = getPageUrl("https://mbasic.facebook.com/"+input_url);
|
||||||
|
url_type = url_type_enum.PAGE;
|
||||||
|
scrapePage(page_url);
|
||||||
|
|
||||||
} catch (URISyntaxException | MalformedURLException e) {
|
} catch (URISyntaxException | MalformedURLException e) {
|
||||||
url_type = url_type_enum.INVALID;
|
url_type = url_type_enum.INVALID;
|
||||||
main.get().input_helper(main.get().getString(R.string.error_url), true);
|
main.get().input_helper(main.get().getString(R.string.error_url), true);
|
||||||
|
|
Loading…
Reference in New Issue