Merge pull request #2138 from mfietz/2126-atom-html

Sanitize HTML from Atom feed descriptions/subtitles
This commit is contained in:
Martin Fietz 2016-11-01 20:49:16 +01:00 committed by GitHub
commit 3322f11612
4 changed files with 174 additions and 76 deletions

View File

@ -28,6 +28,10 @@ import android.widget.Toast;
import com.bumptech.glide.Glide;
import com.joanzapata.iconify.Iconify;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import de.danoeh.antennapod.R;
import de.danoeh.antennapod.core.dialog.ConfirmationDialog;
import de.danoeh.antennapod.core.dialog.DownloadRequestErrorDialogCreator;
@ -41,6 +45,7 @@ import de.danoeh.antennapod.core.storage.DBWriter;
import de.danoeh.antennapod.core.storage.DownloadRequestException;
import de.danoeh.antennapod.core.util.IntentUtils;
import de.danoeh.antennapod.core.util.LangUtils;
import de.danoeh.antennapod.core.util.syndication.HtmlToPlainText;
import de.danoeh.antennapod.menuhandler.FeedMenuHandler;
import rx.Observable;
import rx.Subscription;
@ -51,11 +56,10 @@ import rx.schedulers.Schedulers;
* Displays information about a feed.
*/
public class FeedInfoActivity extends AppCompatActivity {
private static final String TAG = "FeedInfoActivity";
private boolean autoDeleteChanged = false;
public static final String EXTRA_FEED_ID = "de.danoeh.antennapod.extra.feedId";
private static final String TAG = "FeedInfoActivity";
private boolean autoDeleteChanged = false;
private Feed feed;
private ImageView imgvCover;
@ -78,6 +82,7 @@ public class FeedInfoActivity extends AppCompatActivity {
private Subscription subscription;
private final View.OnClickListener copyUrlToClipboard = new View.OnClickListener() {
@Override
public void onClick(View v) {
@ -99,6 +104,40 @@ public class FeedInfoActivity extends AppCompatActivity {
}
};
private boolean authInfoChanged = false;
private TextWatcher authTextWatcher = new TextWatcher() {
@Override
public void beforeTextChanged(CharSequence s, int start, int count, int after) {
}
@Override
public void onTextChanged(CharSequence s, int start, int before, int count) {
}
@Override
public void afterTextChanged(Editable s) {
authInfoChanged = true;
}
};
private boolean filterTextChanged = false;
private TextWatcher filterTextWatcher = new TextWatcher() {
@Override
public void beforeTextChanged(CharSequence s, int start, int count, int after) {
}
@Override
public void onTextChanged(CharSequence s, int start, int before, int count) {
}
@Override
public void afterTextChanged(Editable s) {
filterTextChanged = true;
}
};
@Override
protected void onCreate(Bundle savedInstanceState) {
setTheme(UserPreferences.getTheme());
@ -157,8 +196,19 @@ public class FeedInfoActivity extends AppCompatActivity {
.into(imgvCover);
txtvTitle.setText(feed.getTitle());
String description = feed.getDescription();
txtvDescription.setText((description != null) ? description.trim() : "");
if(description != null) {
if(Feed.TYPE_ATOM1.equals(feed.getType())) {
HtmlToPlainText formatter = new HtmlToPlainText();
Document feedDescription = Jsoup.parse(feed.getDescription());
description = StringUtils.trim(formatter.getPlainText(feedDescription));
}
} else {
description = "";
}
txtvDescription.setText(description);
if (!TextUtils.isEmpty(feed.getAuthor())) {
txtvAuthor.setText(feed.getAuthor());
} else {
@ -250,53 +300,6 @@ public class FeedInfoActivity extends AppCompatActivity {
});
}
@Override
public void onDestroy() {
super.onDestroy();
if(subscription != null) {
subscription.unsubscribe();
}
}
private boolean authInfoChanged = false;
private TextWatcher authTextWatcher = new TextWatcher() {
@Override
public void beforeTextChanged(CharSequence s, int start, int count, int after) {
}
@Override
public void onTextChanged(CharSequence s, int start, int before, int count) {
}
@Override
public void afterTextChanged(Editable s) {
authInfoChanged = true;
}
};
private boolean filterTextChanged = false;
private TextWatcher filterTextWatcher = new TextWatcher() {
@Override
public void beforeTextChanged(CharSequence s, int start, int count, int after) {
}
@Override
public void onTextChanged(CharSequence s, int start, int before, int count) {
}
@Override
public void afterTextChanged(Editable s) {
filterTextChanged = true;
}
};
@Override
protected void onPause() {
super.onPause();
@ -328,6 +331,14 @@ public class FeedInfoActivity extends AppCompatActivity {
}
}
@Override
public void onDestroy() {
super.onDestroy();
if(subscription != null) {
subscription.unsubscribe();
}
}
@Override
public boolean onCreateOptionsMenu(Menu menu) {
super.onCreateOptionsMenu(menu);
@ -379,7 +390,7 @@ public class FeedInfoActivity extends AppCompatActivity {
private final Feed feed;
private final boolean autoDownload;
public ApplyToEpisodesDialog(Context context, Feed feed, boolean autoDownload) {
ApplyToEpisodesDialog(Context context, Feed feed, boolean autoDownload) {
super(context, R.string.auto_download_apply_to_items_title,
R.string.auto_download_apply_to_items_message);
this.feed = feed;

View File

@ -29,7 +29,6 @@ import com.bumptech.glide.Glide;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.examples.HtmlToPlainText;
import org.jsoup.nodes.Document;
import java.io.File;
@ -63,6 +62,7 @@ import de.danoeh.antennapod.core.util.FileNameGenerator;
import de.danoeh.antennapod.core.util.StorageUtils;
import de.danoeh.antennapod.core.util.URLChecker;
import de.danoeh.antennapod.core.util.syndication.FeedDiscoverer;
import de.danoeh.antennapod.core.util.syndication.HtmlToPlainText;
import de.danoeh.antennapod.dialog.AuthenticationDialog;
import de.greenrobot.event.EventBus;
import rx.Observable;
@ -81,17 +81,12 @@ import rx.schedulers.Schedulers;
*/
public class OnlineFeedViewActivity extends AppCompatActivity {
private static final String TAG = "OnlineFeedViewActivity";
public static final String ARG_FEEDURL = "arg.feedurl";
// Optional argument: specify a title for the actionbar.
public static final String ARG_TITLE = "title";
private static final int EVENTS = EventDistributor.FEED_LIST_UPDATE;
public static final int RESULT_ERROR = 2;
private static final String TAG = "OnlineFeedViewActivity";
private static final int EVENTS = EventDistributor.FEED_LIST_UPDATE;
private volatile List<Feed> feeds;
private Feed feed;
private String selectedDownloadUrl;
@ -106,17 +101,11 @@ public class OnlineFeedViewActivity extends AppCompatActivity {
private Subscription download;
private Subscription parser;
private Subscription updater;
public void onEventMainThread(DownloadEvent event) {
Log.d(TAG, "onEventMainThread() called with: " + "event = [" + event + "]");
setSubscribeButtonState(feed);
}
private EventDistributor.EventListener listener = new EventDistributor.EventListener() {
@Override
public void update(EventDistributor eventDistributor, Integer arg) {
if ((arg & EventDistributor.FEED_LIST_UPDATE) != 0) {
updater = Observable.fromCallable(() -> DBReader.getFeedList())
updater = Observable.fromCallable(DBReader::getFeedList)
.subscribeOn(Schedulers.newThread())
.observeOn(AndroidSchedulers.mainThread())
.subscribe(
@ -133,6 +122,11 @@ public class OnlineFeedViewActivity extends AppCompatActivity {
}
};
public void onEventMainThread(DownloadEvent event) {
Log.d(TAG, "onEventMainThread() called with: " + "event = [" + event + "]");
setSubscribeButtonState(feed);
}
@Override
protected void onCreate(Bundle savedInstanceState) {
setTheme(UserPreferences.getTheme());
@ -284,7 +278,7 @@ public class OnlineFeedViewActivity extends AppCompatActivity {
})
.subscribeOn(Schedulers.newThread())
.observeOn(AndroidSchedulers.mainThread())
.subscribe(status -> checkDownloadResult(status),
.subscribe(this::checkDownloadResult,
error -> Log.e(TAG, Log.getStackTraceString(error)));
}
@ -360,14 +354,19 @@ public class OnlineFeedViewActivity extends AppCompatActivity {
* This method is executed on a background thread
*/
private void beforeShowFeedInformation(Feed feed) {
// remove HTML tags from descriptions
final HtmlToPlainText formatter = new HtmlToPlainText();
if(Feed.TYPE_ATOM1.equals(feed.getType())) {
// remove HTML tags from descriptions
Log.d(TAG, "Removing HTML from feed description");
Document feedDescription = Jsoup.parse(feed.getDescription());
feed.setDescription(StringUtils.trim(formatter.getPlainText(feedDescription)));
}
Log.d(TAG, "Removing HTML from shownotes");
if (feed.getItems() != null) {
HtmlToPlainText formatter = new HtmlToPlainText();
for (FeedItem item : feed.getItems()) {
if (item.getDescription() != null) {
Document description = Jsoup.parse(item.getDescription());
item.setDescription(StringUtils.trim(formatter.getPlainText(description)));
Document itemDescription = Jsoup.parse(item.getDescription());
item.setDescription(StringUtils.trim(formatter.getPlainText(itemDescription)));
}
}
}
@ -589,7 +588,7 @@ public class OnlineFeedViewActivity extends AppCompatActivity {
private String feedUrl;
public FeedViewAuthenticationDialog(Context context, int titleRes, String feedUrl) {
FeedViewAuthenticationDialog(Context context, int titleRes, String feedUrl) {
super(context, titleRes, true, false, null, null);
this.feedUrl = feedUrl;
}

View File

@ -15,7 +15,7 @@ import de.danoeh.antennapod.core.util.NetworkUtils;
/**
* Utility methods for adapters
*/
public class AdapterUtils {
class AdapterUtils {
private static final String TAG = AdapterUtils.class.getSimpleName();
@ -26,7 +26,7 @@ public class AdapterUtils {
/**
* Updates the contents of the TextView that shows the current playback position and the ProgressBar.
*/
public static void updateEpisodePlaybackProgress(FeedItem item, TextView txtvPos, ProgressBar episodeProgress) {
static void updateEpisodePlaybackProgress(FeedItem item, TextView txtvPos, ProgressBar episodeProgress) {
FeedMedia media = item.getMedia();
episodeProgress.setVisibility(View.GONE);
if (media == null) {
@ -47,7 +47,6 @@ public class AdapterUtils {
- media.getPosition()));
}
} else if (!media.isDownloaded()) {
Log.d(TAG, "size: " + media.getSize());
if (media.getSize() > 0) {
txtvPos.setText(Converter.byteToString(media.getSize()));
} else if(NetworkUtils.isDownloadAllowed() && !media.checkedOnSizeButUnknown()) {

View File

@ -0,0 +1,89 @@
package de.danoeh.antennapod.core.util.syndication;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
/**
* This class is based on <code>HtmlToPlainText</code> from jsoup's examples package.
*
* HTML to plain-text. This example program demonstrates the use of jsoup to convert HTML input to lightly-formatted
* plain-text. That is divergent from the general goal of jsoup's .text() methods, which is to get clean data from a
* scrape.
* <p>
* Note that this is a fairly simplistic formatter -- for real world use you'll want to embrace and extend.
* </p>
* <p>
* To invoke from the command line, assuming you've downloaded the jsoup jar to your current directory:</p>
* <p><code>java -cp jsoup.jar org.jsoup.examples.HtmlToPlainText url [selector]</code></p>
* where <i>url</i> is the URL to fetch, and <i>selector</i> is an optional CSS selector.
*
* @author Jonathan Hedley, jonathan@hedley.net
* @author AntennaPod open source community
*/
public class HtmlToPlainText {
/**
* Format an Element to plain-text
* @param element the root element to format
* @return formatted text
*/
public String getPlainText(Element element) {
FormattingVisitor formatter = new FormattingVisitor();
NodeTraversor traversor = new NodeTraversor(formatter);
traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node
return formatter.toString();
}
// the formatting rules, implemented in a breadth-first DOM traverse
private class FormattingVisitor implements NodeVisitor {
private StringBuilder accum = new StringBuilder(); // holds the accumulated text
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode) {
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
}
else if (name.equals("li")) {
append("\n * ");
}
else if (name.equals("dt")) {
append(" ");
}
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) {
append("\n");
}
}
// hit when all of the node's children (if any) have been visited
public void tail(Node node, int depth) {
String name = node.nodeName();
if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) {
append("\n");
} else if (name.equals("a")) {
append(String.format(" <%s>", node.absUrl("href")));
}
}
// appends text to the string builder with a simple word wrap method
private void append(String text) {
if (text.equals(" ") &&
(accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n"))) {
return; // don't accumulate long runs of empty spaces
}
accum.append(text);
}
@Override
public String toString() {
return accum.toString();
}
}
}