1
0
mirror of https://github.com/akaessens/NoFbEventScraper synced 2025-06-05 23:29:13 +02:00

much refactoring:

-move event formatting logic to event class
-disable editing of event output, it's available in the calendar app
-replace string datetimes with ZonedDateZime
-move uri checking logic to scraper
-update exception handling and error messages
-reformatting and renaming
-fix messy xml layouts
-update tests
-add comments
This commit is contained in:
akaessens
2020-08-28 14:31:45 +02:00
parent 05f3ba9a33
commit 16d390094e
8 changed files with 427 additions and 568 deletions

View File

@ -1,6 +1,5 @@
package com.akdev.nofbeventscraper;
import android.os.AsyncTask;
import android.text.Editable;
import android.text.SpannableStringBuilder;
@ -11,69 +10,100 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class FbScraper extends AsyncTask<Void, Void, Void> {
private String url;
private String error;
private MainActivity main;
private String input_str;
private WeakReference<MainActivity> main; // no context leak with WeakReference
private FbEvent event;
FbScraper(MainActivity main, String url) {
this.url = url;
FbScraper(WeakReference<MainActivity> main, String str) {
this.main = main;
this.input_str = str;
}
protected String fixURI(String str) throws URISyntaxException, MalformedURLException {
// check for url format
new URL(str).toURI();
Pattern pattern = Pattern.compile("(facebook.com/events/[0-9]*)");
Matcher matcher = pattern.matcher(str);
if (matcher.find()) {
// rewrite url to m.facebook and dismiss any query strings or referrals
return "https://m." + matcher.group(1);
} else {
throw new URISyntaxException(str, "Does not contain event.");
}
}
protected String fixLocation(String location_json) {
String name = "";
String location_name = "";
try {
JSONObject reader = new JSONObject(location_json);
name = reader.getString("name");
location_name = reader.getString("name");
JSONObject address = reader.getJSONObject("address");
String type = address.getString("@type");
if (type.equals("PostalAddress"))
{
if (type.equals("PostalAddress")) {
String postal_code = address.getString("postalCode");
String address_locality = address.getString("addressLocality");
String address_country = address.getString("addressCountry");
String street_address = address.getString("streetAddress");
// included in locality
//String address_country = address.getString("addressCountry");
return name + ", " + street_address + ", " + postal_code + " " + address_locality;
}
else
{
return name;
}
return location_name + ", "
+ street_address + ", "
+ postal_code + " "
+ address_locality;
} else {
return location_name;
}
} catch (JSONException e) {
e.printStackTrace();
return name;
return location_name;
}
}
protected String fixTimezone(String time_in) {
protected ZonedDateTime toZonedDateTime(String time_in) {
try {
// time in is missing a : in the timezone offset
Editable editable = new SpannableStringBuilder(time_in);
String time_str = editable.insert(22, ":").toString();
return editable.insert(22, ":").toString();
// parse e.g. 2011-12-03T10:15:30+01:00
return ZonedDateTime.parse(time_str, DateTimeFormatter.ISO_OFFSET_DATE_TIME);
} catch (Exception e) {
e.printStackTrace();
return "";
return null;
}
}
protected String fixLinks(String description_in) {
protected String fixDescriptionLinks(String description_in) {
try {
// @[152580919265:274:MagentaMusik 360] -> m.facebook.com/152580919265
/* @[152580919265:274:SiteDescription]
* to
* SiteDescription [m.facebook.com/152580919265] */
return description_in.replaceAll("@\\[([0-9]{10,}):[0-9]{3}:([^]]*)]",
"$2 [m.facebook.com/$1]");
@ -86,60 +116,45 @@ public class FbScraper extends AsyncTask<Void, Void, Void> {
private String readFromJson(JSONObject reader, String field) {
try {
return reader.getString(field);
}
catch (Exception e) {
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
@Override
protected Void doInBackground(Void... voids) {
Document document = null;
try {
document = Jsoup.connect(url).userAgent("Mozilla").get();
String url = fixURI(input_str);
// useragent needed with Jsoup > 1.12
Document document = Jsoup.connect(url).userAgent("Mozilla").get();
String json = document
.select("script[type = application/ld+json]")
.first().data();
try {
String json = document.select("script[type = application/ld+json]").first().data();
JSONObject reader = new JSONObject(json);
JSONObject reader = new JSONObject(json);
event = new FbEvent();
event.url = url;
event.name = readFromJson(reader, "name");
event.start_date = toZonedDateTime(readFromJson(reader, "startDate"));
event.end_date = toZonedDateTime(readFromJson(reader, "endDate"));
event.description = fixDescriptionLinks(readFromJson(reader, "description"));
event.location = fixLocation(readFromJson(reader, "location"));
event.image_url = readFromJson(reader, "image");
String event_name = readFromJson(reader, "name");
String event_start = fixTimezone(readFromJson(reader, "startDate"));
String event_end = fixTimezone(readFromJson(reader, "endDate"));
String event_description = fixLinks(readFromJson(reader, "description"));
String location = fixLocation(readFromJson(reader, "location"));
String image_url = "";
try {
image_url = readFromJson(reader, "image"); // get from json
// get from event header
image_url = document.getElementsByClass("scaledImageFitWidth").first().attr("src");
} catch (Exception e) {
e.printStackTrace();
this.error = "Error: no image found";
}
if (event_name == null) {
this.event = null;
throw new Exception();
} else {
this.event = new FbEvent(event_name, event_start, event_end, event_description, location, image_url);
//this.event = new FbEvent("", "", "", "", "", "");
}
} catch (Exception e) {
e.printStackTrace();
this.error = "Error: Scraping event data failed";
}
} catch (Exception e) {
} catch (URISyntaxException | MalformedURLException e) {
e.printStackTrace();
this.error = "Error: URL not available";
this.error = "Error: URL invalid.";
} catch (JSONException e) {
e.printStackTrace();
this.error = "Error: Scraping event data failed";
} catch (IOException e) {
e.printStackTrace();
this.error = "Error: Unable to connect.";
}
return null;
}
@ -152,11 +167,10 @@ public class FbScraper extends AsyncTask<Void, Void, Void> {
super.onPostExecute(aVoid);
if (this.event != null) {
this.main.update(event);
}
else {
main.error(error);
this.main.clear(false);
main.get().update(event);
} else {
main.get().error(error);
main.get().clear(false);
}
}
}