From ca83c5953727d4096ddb7af353575b23324dce80 Mon Sep 17 00:00:00 2001 From: fossterer Date: Thu, 24 Oct 2019 23:20:31 -0400 Subject: [PATCH] Fixes #3444 - Strip off HTML from podcast descriptions --- .gitignore | 2 + .../activity/OnlineFeedViewActivity.java | 16 +++----- .../antennapod/fragment/FeedInfoFragment.java | 12 +----- .../util/syndication/HtmlToPlainText.java | 40 +++++++++++++++++++ 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index b10f948bb..9ae18e7ea 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ bin/ gen/ target/ build/ +**/*.project +**/*.classpath # Local configuration file (sdk path, etc) local.properties diff --git a/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java b/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java index 39715495a..9486c810f 100644 --- a/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java +++ b/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java @@ -367,20 +367,14 @@ public class OnlineFeedViewActivity extends AppCompatActivity { * This method is executed on a background thread */ private void beforeShowFeedInformation(Feed feed) { - final HtmlToPlainText formatter = new HtmlToPlainText(); - if(Feed.TYPE_ATOM1.equals(feed.getType()) && feed.getDescription() != null) { - // remove HTML tags from descriptions - Log.d(TAG, "Removing HTML from feed description"); - Document feedDescription = Jsoup.parse(feed.getDescription()); - feed.setDescription(StringUtils.trim(formatter.getPlainText(feedDescription))); - } + Log.d(TAG, "Removing HTML from feed description"); + + feed.setDescription(HtmlToPlainText.getPlainText(feed.getDescription())); + Log.d(TAG, "Removing HTML from shownotes"); if (feed.getItems() != null) { for (FeedItem item : feed.getItems()) { - if (item.getDescription() != null) { - Document itemDescription = Jsoup.parse(item.getDescription()); - item.setDescription(StringUtils.trim(formatter.getPlainText(itemDescription))); - } + item.setDescription(HtmlToPlainText.getPlainText(item.getDescription())); } } } diff --git a/app/src/main/java/de/danoeh/antennapod/fragment/FeedInfoFragment.java b/app/src/main/java/de/danoeh/antennapod/fragment/FeedInfoFragment.java index 6b270e220..3b843e150 100644 --- a/app/src/main/java/de/danoeh/antennapod/fragment/FeedInfoFragment.java +++ b/app/src/main/java/de/danoeh/antennapod/fragment/FeedInfoFragment.java @@ -167,16 +167,8 @@ public class FeedInfoFragment extends Fragment { txtvTitle.setText(feed.getTitle()); - String description = feed.getDescription(); - if(description != null) { - if(Feed.TYPE_ATOM1.equals(feed.getType())) { - HtmlToPlainText formatter = new HtmlToPlainText(); - Document feedDescription = Jsoup.parse(feed.getDescription()); - description = StringUtils.trim(formatter.getPlainText(feedDescription)); - } - } else { - description = ""; - } + String description = HtmlToPlainText.getPlainText(feed.getDescription()); + txtvDescription.setText(description); if (!TextUtils.isEmpty(feed.getAuthor())) { diff --git a/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java index 61072f1ad..3550f28c6 100644 --- a/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java +++ b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java @@ -1,12 +1,19 @@ package de.danoeh.antennapod.core.util.syndication; +import android.text.TextUtils; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; +import java.util.regex.Pattern; + /** * This class is based on HtmlToPlainText from jsoup's examples package. * @@ -26,6 +33,39 @@ import org.jsoup.select.NodeVisitor; */ public class HtmlToPlainText { + /** + * Use this method to strip off HTML encoding from given text + *

+ * Replaces bullet points with *, ignores colors/bold/... + * + * @param str String with any encoding + * @return Human readable text with minimal HTML formatting + */ + public static String getPlainText(String str) { + if (!TextUtils.isEmpty(str) && isHtml(str)) { + HtmlToPlainText formatter = new HtmlToPlainText(); + Document feedDescription = Jsoup.parse(str); + str = StringUtils.trim(formatter.getPlainText(feedDescription)); + } else if (TextUtils.isEmpty(str)) { + str = ""; + } + + return str; + } + + /** + * Use this method to determine if a given text has any HTML tag + * + * @param str String to be tested for presence of HTML content + * @return True if text contains any HTML tags
False is no HTML tag is found + */ + private static boolean isHtml(String str) { + final String HTML_TAG_PATTERN = "<(\"[^\"]*\"|'[^']*'|[^'\">])*>"; + Pattern htmlValidator = TextUtils.isEmpty(HTML_TAG_PATTERN) ? null : Pattern.compile(HTML_TAG_PATTERN); + + return htmlValidator.matcher(str).find(); + } + /** * Format an Element to plain-text * @param element the root element to format