Allow input of arbitraty page name

Document receiver will check for 404 error if page is invalid.
If page is valid just scrape it as if it was the full uri.

closes #34
This commit is contained in:
akaessens 2021-08-10 14:46:44 +02:00
parent b4d37fbc3f
commit 7fdfd38cdc
4 changed files with 55 additions and 39 deletions

View File

@ -3,19 +3,22 @@ package com.akdev.nofbeventscraper;
import android.util.Log; import android.util.Log;
import org.jsoup.Connection; import org.jsoup.Connection;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
public class DocumentReceiver { public class DocumentReceiver {
public static org.jsoup.nodes.Document getDocument(String url) { public static org.jsoup.nodes.Document getDocument(String url) throws HttpStatusException, IOException {
org.jsoup.nodes.Document document; org.jsoup.nodes.Document document;
try {
// use default android user agent // use default android user agent
String user_agent = "Mozilla/5.0 (X11; Linux x86_64)"; String user_agent = "Mozilla/5.0 (X11; Linux x86_64)";
@ -51,10 +54,6 @@ public class DocumentReceiver {
} catch (Exception ignore) { } catch (Exception ignore) {
} }
} catch (Exception e) {
e.printStackTrace();
return null;
}
return document; return document;
} }
} }

View File

@ -5,6 +5,7 @@ import android.util.Log;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import java.io.IOException; import java.io.IOException;
@ -146,9 +147,8 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
Log.d("scraperLog", "doInBackground: "+url); Log.d("scraperLog", "doInBackground: "+url);
Document document = DocumentReceiver.getDocument(url);
try { try {
Document document = DocumentReceiver.getDocument(url);
if (document == null) { if (document == null) {
throw new IOException(); throw new IOException();
} }
@ -191,7 +191,10 @@ public class FbEventScraper extends AsyncTask<Void, Void, Void> {
this.event = new FbEvent(url, name, start_date, end_date, description, location, image_url); this.event = new FbEvent(url, name, start_date, end_date, description, location, image_url);
} catch (IOException e) { } catch (HttpStatusException e) {
this.error = R.string.error_url;
}
catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
this.error = R.string.error_connection; this.error = R.string.error_connection;
} catch (Exception e) { } catch (Exception e) {

View File

@ -5,6 +5,7 @@ import android.os.AsyncTask;
import androidx.preference.PreferenceManager; import androidx.preference.PreferenceManager;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import java.io.IOException; import java.io.IOException;
@ -95,7 +96,9 @@ public class FbPageScraper extends AsyncTask<Void, Void, Void> {
url = null; url = null;
event_links = event_links.subList(0, max); event_links = event_links.subList(0, max);
} }
} catch (HttpStatusException e) {
this.error = R.string.error_url;
return null;
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
this.error = R.string.error_connection; this.error = R.string.error_connection;

View File

@ -264,6 +264,17 @@ public class FbScraper {
url_type = url_type_enum.PAGE; url_type = url_type_enum.PAGE;
scrapePage(page_url); scrapePage(page_url);
return;
} catch (URISyntaxException | MalformedURLException e) {
url_type = url_type_enum.INVALID;
}
// check if only page name without prefix
try {
String page_url = getPageUrl("https://mbasic.facebook.com/"+input_url);
url_type = url_type_enum.PAGE;
scrapePage(page_url);
} catch (URISyntaxException | MalformedURLException e) { } catch (URISyntaxException | MalformedURLException e) {
url_type = url_type_enum.INVALID; url_type = url_type_enum.INVALID;
main.get().input_helper(main.get().getString(R.string.error_url), true); main.get().input_helper(main.get().getString(R.string.error_url), true);