Fixes #3444 - Strip off HTML from podcast descriptions
This commit is contained in:
parent
437f3f29c0
commit
ca83c59537
|
@ -13,6 +13,8 @@ bin/
|
||||||
gen/
|
gen/
|
||||||
target/
|
target/
|
||||||
build/
|
build/
|
||||||
|
**/*.project
|
||||||
|
**/*.classpath
|
||||||
|
|
||||||
# Local configuration file (sdk path, etc)
|
# Local configuration file (sdk path, etc)
|
||||||
local.properties
|
local.properties
|
||||||
|
|
|
@ -367,20 +367,14 @@ public class OnlineFeedViewActivity extends AppCompatActivity {
|
||||||
* This method is executed on a background thread
|
* This method is executed on a background thread
|
||||||
*/
|
*/
|
||||||
private void beforeShowFeedInformation(Feed feed) {
|
private void beforeShowFeedInformation(Feed feed) {
|
||||||
final HtmlToPlainText formatter = new HtmlToPlainText();
|
|
||||||
if(Feed.TYPE_ATOM1.equals(feed.getType()) && feed.getDescription() != null) {
|
|
||||||
// remove HTML tags from descriptions
|
|
||||||
Log.d(TAG, "Removing HTML from feed description");
|
Log.d(TAG, "Removing HTML from feed description");
|
||||||
Document feedDescription = Jsoup.parse(feed.getDescription());
|
|
||||||
feed.setDescription(StringUtils.trim(formatter.getPlainText(feedDescription)));
|
feed.setDescription(HtmlToPlainText.getPlainText(feed.getDescription()));
|
||||||
}
|
|
||||||
Log.d(TAG, "Removing HTML from shownotes");
|
Log.d(TAG, "Removing HTML from shownotes");
|
||||||
if (feed.getItems() != null) {
|
if (feed.getItems() != null) {
|
||||||
for (FeedItem item : feed.getItems()) {
|
for (FeedItem item : feed.getItems()) {
|
||||||
if (item.getDescription() != null) {
|
item.setDescription(HtmlToPlainText.getPlainText(item.getDescription()));
|
||||||
Document itemDescription = Jsoup.parse(item.getDescription());
|
|
||||||
item.setDescription(StringUtils.trim(formatter.getPlainText(itemDescription)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,16 +167,8 @@ public class FeedInfoFragment extends Fragment {
|
||||||
|
|
||||||
txtvTitle.setText(feed.getTitle());
|
txtvTitle.setText(feed.getTitle());
|
||||||
|
|
||||||
String description = feed.getDescription();
|
String description = HtmlToPlainText.getPlainText(feed.getDescription());
|
||||||
if(description != null) {
|
|
||||||
if(Feed.TYPE_ATOM1.equals(feed.getType())) {
|
|
||||||
HtmlToPlainText formatter = new HtmlToPlainText();
|
|
||||||
Document feedDescription = Jsoup.parse(feed.getDescription());
|
|
||||||
description = StringUtils.trim(formatter.getPlainText(feedDescription));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
description = "";
|
|
||||||
}
|
|
||||||
txtvDescription.setText(description);
|
txtvDescription.setText(description);
|
||||||
|
|
||||||
if (!TextUtils.isEmpty(feed.getAuthor())) {
|
if (!TextUtils.isEmpty(feed.getAuthor())) {
|
||||||
|
|
|
@ -1,12 +1,19 @@
|
||||||
package de.danoeh.antennapod.core.util.syndication;
|
package de.danoeh.antennapod.core.util.syndication;
|
||||||
|
|
||||||
|
import android.text.TextUtils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.helper.StringUtil;
|
import org.jsoup.helper.StringUtil;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.nodes.Node;
|
import org.jsoup.nodes.Node;
|
||||||
import org.jsoup.nodes.TextNode;
|
import org.jsoup.nodes.TextNode;
|
||||||
import org.jsoup.select.NodeTraversor;
|
import org.jsoup.select.NodeTraversor;
|
||||||
import org.jsoup.select.NodeVisitor;
|
import org.jsoup.select.NodeVisitor;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class is based on <code>HtmlToPlainText</code> from jsoup's examples package.
|
* This class is based on <code>HtmlToPlainText</code> from jsoup's examples package.
|
||||||
*
|
*
|
||||||
|
@ -26,6 +33,39 @@ import org.jsoup.select.NodeVisitor;
|
||||||
*/
|
*/
|
||||||
public class HtmlToPlainText {
|
public class HtmlToPlainText {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this method to strip off HTML encoding from given text
|
||||||
|
* <p>
|
||||||
|
* Replaces bullet points with *, ignores colors/bold/...
|
||||||
|
*
|
||||||
|
* @param str String with any encoding
|
||||||
|
* @return Human readable text with minimal HTML formatting
|
||||||
|
*/
|
||||||
|
public static String getPlainText(String str) {
|
||||||
|
if (!TextUtils.isEmpty(str) && isHtml(str)) {
|
||||||
|
HtmlToPlainText formatter = new HtmlToPlainText();
|
||||||
|
Document feedDescription = Jsoup.parse(str);
|
||||||
|
str = StringUtils.trim(formatter.getPlainText(feedDescription));
|
||||||
|
} else if (TextUtils.isEmpty(str)) {
|
||||||
|
str = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this method to determine if a given text has any HTML tag
|
||||||
|
*
|
||||||
|
* @param str String to be tested for presence of HTML content
|
||||||
|
* @return <b>True</b> if text contains any HTML tags</br><b>False</b> is no HTML tag is found
|
||||||
|
*/
|
||||||
|
private static boolean isHtml(String str) {
|
||||||
|
final String HTML_TAG_PATTERN = "<(\"[^\"]*\"|'[^']*'|[^'\">])*>";
|
||||||
|
Pattern htmlValidator = TextUtils.isEmpty(HTML_TAG_PATTERN) ? null : Pattern.compile(HTML_TAG_PATTERN);
|
||||||
|
|
||||||
|
return htmlValidator.matcher(str).find();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Format an Element to plain-text
|
* Format an Element to plain-text
|
||||||
* @param element the root element to format
|
* @param element the root element to format
|
||||||
|
|
Loading…
Reference in New Issue