From ab182c4b63fd52c67692ade92cff43461f0b9587 Mon Sep 17 00:00:00 2001 From: Martin Fietz Date: Tue, 1 Nov 2016 12:01:21 +0100 Subject: [PATCH] Replace jsoup's example html to plain text parser with adaptation thereof --- .../antennapod/activity/FeedInfoActivity.java | 98 +++++++++---------- .../activity/OnlineFeedViewActivity.java | 22 ++--- .../antennapod/adapter/AdapterUtils.java | 5 +- .../util/syndication/HtmlToPlainText.java | 89 +++++++++++++++++ 4 files changed, 146 insertions(+), 68 deletions(-) create mode 100644 core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java diff --git a/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java b/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java index 3ccb94d97..19aabfc88 100644 --- a/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java +++ b/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java @@ -45,6 +45,7 @@ import de.danoeh.antennapod.core.storage.DBWriter; import de.danoeh.antennapod.core.storage.DownloadRequestException; import de.danoeh.antennapod.core.util.IntentUtils; import de.danoeh.antennapod.core.util.LangUtils; +import de.danoeh.antennapod.core.util.syndication.HtmlToPlainText; import de.danoeh.antennapod.menuhandler.FeedMenuHandler; import rx.Observable; import rx.Subscription; @@ -55,11 +56,10 @@ import rx.schedulers.Schedulers; * Displays information about a feed. */ public class FeedInfoActivity extends AppCompatActivity { - private static final String TAG = "FeedInfoActivity"; - private boolean autoDeleteChanged = false; public static final String EXTRA_FEED_ID = "de.danoeh.antennapod.extra.feedId"; - + private static final String TAG = "FeedInfoActivity"; + private boolean autoDeleteChanged = false; private Feed feed; private ImageView imgvCover; @@ -82,6 +82,7 @@ public class FeedInfoActivity extends AppCompatActivity { private Subscription subscription; + private final View.OnClickListener copyUrlToClipboard = new View.OnClickListener() { @Override public void onClick(View v) { @@ -103,6 +104,40 @@ public class FeedInfoActivity extends AppCompatActivity { } }; + private boolean authInfoChanged = false; + + private TextWatcher authTextWatcher = new TextWatcher() { + @Override + public void beforeTextChanged(CharSequence s, int start, int count, int after) { + } + + @Override + public void onTextChanged(CharSequence s, int start, int before, int count) { + } + + @Override + public void afterTextChanged(Editable s) { + authInfoChanged = true; + } + }; + + private boolean filterTextChanged = false; + + private TextWatcher filterTextWatcher = new TextWatcher() { + @Override + public void beforeTextChanged(CharSequence s, int start, int count, int after) { + } + + @Override + public void onTextChanged(CharSequence s, int start, int before, int count) { + } + + @Override + public void afterTextChanged(Editable s) { + filterTextChanged = true; + } + }; + @Override protected void onCreate(Bundle savedInstanceState) { setTheme(UserPreferences.getTheme()); @@ -174,7 +209,7 @@ public class FeedInfoActivity extends AppCompatActivity { } txtvDescription.setText(description); - if (feed.getAuthor() != null) { + if (!TextUtils.isEmpty(feed.getAuthor())) { txtvAuthor.setText(feed.getAuthor()); } else { lblAuthor.setVisibility(View.GONE); @@ -265,53 +300,6 @@ public class FeedInfoActivity extends AppCompatActivity { }); } - @Override - public void onDestroy() { - super.onDestroy(); - if(subscription != null) { - subscription.unsubscribe(); - } - } - - - private boolean authInfoChanged = false; - - private TextWatcher authTextWatcher = new TextWatcher() { - @Override - public void beforeTextChanged(CharSequence s, int start, int count, int after) { - - } - - @Override - public void onTextChanged(CharSequence s, int start, int before, int count) { - - } - - @Override - public void afterTextChanged(Editable s) { - authInfoChanged = true; - } - }; - - private boolean filterTextChanged = false; - - private TextWatcher filterTextWatcher = new TextWatcher() { - @Override - public void beforeTextChanged(CharSequence s, int start, int count, int after) { - - } - - @Override - public void onTextChanged(CharSequence s, int start, int before, int count) { - - } - - @Override - public void afterTextChanged(Editable s) { - filterTextChanged = true; - } - }; - @Override protected void onPause() { super.onPause(); @@ -343,6 +331,14 @@ public class FeedInfoActivity extends AppCompatActivity { } } + @Override + public void onDestroy() { + super.onDestroy(); + if(subscription != null) { + subscription.unsubscribe(); + } + } + @Override public boolean onCreateOptionsMenu(Menu menu) { super.onCreateOptionsMenu(menu); diff --git a/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java b/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java index 592be3ec6..99f3bcc00 100644 --- a/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java +++ b/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java @@ -29,7 +29,6 @@ import com.bumptech.glide.Glide; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; -import org.jsoup.examples.HtmlToPlainText; import org.jsoup.nodes.Document; import java.io.File; @@ -63,6 +62,7 @@ import de.danoeh.antennapod.core.util.FileNameGenerator; import de.danoeh.antennapod.core.util.StorageUtils; import de.danoeh.antennapod.core.util.URLChecker; import de.danoeh.antennapod.core.util.syndication.FeedDiscoverer; +import de.danoeh.antennapod.core.util.syndication.HtmlToPlainText; import de.danoeh.antennapod.dialog.AuthenticationDialog; import de.greenrobot.event.EventBus; import rx.Observable; @@ -81,17 +81,12 @@ import rx.schedulers.Schedulers; */ public class OnlineFeedViewActivity extends AppCompatActivity { - private static final String TAG = "OnlineFeedViewActivity"; - public static final String ARG_FEEDURL = "arg.feedurl"; - // Optional argument: specify a title for the actionbar. public static final String ARG_TITLE = "title"; - - private static final int EVENTS = EventDistributor.FEED_LIST_UPDATE; - public static final int RESULT_ERROR = 2; - + private static final String TAG = "OnlineFeedViewActivity"; + private static final int EVENTS = EventDistributor.FEED_LIST_UPDATE; private volatile List feeds; private Feed feed; private String selectedDownloadUrl; @@ -106,12 +101,6 @@ public class OnlineFeedViewActivity extends AppCompatActivity { private Subscription download; private Subscription parser; private Subscription updater; - - public void onEventMainThread(DownloadEvent event) { - Log.d(TAG, "onEventMainThread() called with: " + "event = [" + event + "]"); - setSubscribeButtonState(feed); - } - private EventDistributor.EventListener listener = new EventDistributor.EventListener() { @Override public void update(EventDistributor eventDistributor, Integer arg) { @@ -133,6 +122,11 @@ public class OnlineFeedViewActivity extends AppCompatActivity { } }; + public void onEventMainThread(DownloadEvent event) { + Log.d(TAG, "onEventMainThread() called with: " + "event = [" + event + "]"); + setSubscribeButtonState(feed); + } + @Override protected void onCreate(Bundle savedInstanceState) { setTheme(UserPreferences.getTheme()); diff --git a/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java b/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java index 8aaf0055a..5c58d00f2 100644 --- a/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java +++ b/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java @@ -15,7 +15,7 @@ import de.danoeh.antennapod.core.util.NetworkUtils; /** * Utility methods for adapters */ -public class AdapterUtils { +class AdapterUtils { private static final String TAG = AdapterUtils.class.getSimpleName(); @@ -26,7 +26,7 @@ public class AdapterUtils { /** * Updates the contents of the TextView that shows the current playback position and the ProgressBar. */ - public static void updateEpisodePlaybackProgress(FeedItem item, TextView txtvPos, ProgressBar episodeProgress) { + static void updateEpisodePlaybackProgress(FeedItem item, TextView txtvPos, ProgressBar episodeProgress) { FeedMedia media = item.getMedia(); episodeProgress.setVisibility(View.GONE); if (media == null) { @@ -47,7 +47,6 @@ public class AdapterUtils { - media.getPosition())); } } else if (!media.isDownloaded()) { - Log.d(TAG, "size: " + media.getSize()); if (media.getSize() > 0) { txtvPos.setText(Converter.byteToString(media.getSize())); } else if(NetworkUtils.isDownloadAllowed() && !media.checkedOnSizeButUnknown()) { diff --git a/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java new file mode 100644 index 000000000..bd40f398d --- /dev/null +++ b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java @@ -0,0 +1,89 @@ +package de.danoeh.antennapod.core.util.syndication; + +import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; + +/** + * This class is based on HtmlToPlainText from jsoup's examples package. + * + * HTML to plain-text. This example program demonstrates the use of jsoup to convert HTML input to lightly-formatted + * plain-text. That is divergent from the general goal of jsoup's .text() methods, which is to get clean data from a + * scrape. + *

+ * Note that this is a fairly simplistic formatter -- for real world use you'll want to embrace and extend. + *

+ *

+ * To invoke from the command line, assuming you've downloaded the jsoup jar to your current directory:

+ *

java -cp jsoup.jar org.jsoup.examples.HtmlToPlainText url [selector]

+ * where url is the URL to fetch, and selector is an optional CSS selector. + * + * @author Jonathan Hedley, jonathan@hedley.net + * @author AntennaPod open source community + */ +public class HtmlToPlainText { + + /** + * Format an Element to plain-text + * @param element the root element to format + * @return formatted text + */ + public String getPlainText(Element element) { + FormattingVisitor formatter = new FormattingVisitor(); + NodeTraversor traversor = new NodeTraversor(formatter); + traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node + + return formatter.toString(); + } + + // the formatting rules, implemented in a breadth-first DOM traverse + private class FormattingVisitor implements NodeVisitor { + + private StringBuilder accum = new StringBuilder(); // holds the accumulated text + + // hit when the node is first seen + public void head(Node node, int depth) { + String name = node.nodeName(); + if (node instanceof TextNode) { + append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. + } + else if (name.equals("li")) { + append("\n * "); + } + else if (name.equals("dt")) { + append(" "); + } + else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) { + append("\n"); + } + } + + // hit when all of the node's children (if any) have been visited + public void tail(Node node, int depth) { + String name = node.nodeName(); + if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) { + append("\n"); + } else if (name.equals("a")) { + append(String.format(" <%s>", node.absUrl("href"))); + } + } + + // appends text to the string builder with a simple word wrap method + private void append(String text) { + if (text.equals(" ") && + (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n"))) { + return; // don't accumulate long runs of empty spaces + } + + accum.append(text); + } + + @Override + public String toString() { + return accum.toString(); + } + } +}