From dd2dd9e4b858d267d2ed75847dc4d14eb81c9b05 Mon Sep 17 00:00:00 2001 From: ByteHamster Date: Tue, 16 Nov 2021 21:23:52 +0100 Subject: [PATCH] Work around some publishers putting html everywhere --- .../parser/feed/namespace/Itunes.java | 79 ++++++------------- .../parser/feed/namespace/Rss20.java | 10 ++- 2 files changed, 29 insertions(+), 60 deletions(-) diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java index 5f47f8377..63d8dd476 100644 --- a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java @@ -49,66 +49,33 @@ public class Itunes extends Namespace { return; } - if (AUTHOR.equals(localName)) { - parseAuthor(state); + String content = state.getContentBuf().toString(); + String contentFromHtml = HtmlCompat.fromHtml(content, HtmlCompat.FROM_HTML_MODE_COMPACT).toString(); + if (TextUtils.isEmpty(content)) { + return; + } + + if (AUTHOR.equals(localName) && state.getFeed() != null) { + state.getFeed().setAuthor(contentFromHtml); } else if (DURATION.equals(localName)) { - parseDuration(state); + try { + long durationMs = DurationParser.inMillis(content); + state.getTempObjects().put(DURATION, (int) durationMs); + } catch (NumberFormatException e) { + Log.e(NSTAG, String.format("Duration '%s' could not be parsed", content)); + } } else if (SUBTITLE.equals(localName)) { - parseSubtitle(state); + if (state.getCurrentItem() != null && TextUtils.isEmpty(state.getCurrentItem().getDescription())) { + state.getCurrentItem().setDescriptionIfLonger(content); + } else if (state.getFeed() != null && TextUtils.isEmpty(state.getFeed().getDescription())) { + state.getFeed().setDescription(content); + } } else if (SUMMARY.equals(localName)) { - SyndElement secondElement = state.getSecondTag(); - parseSummary(state, secondElement.getName()); - } - } - - private void parseAuthor(HandlerState state) { - if (state.getFeed() != null) { - String author = state.getContentBuf().toString(); - state.getFeed().setAuthor(HtmlCompat.fromHtml(author, - HtmlCompat.FROM_HTML_MODE_LEGACY).toString()); - } - } - - private void parseDuration(HandlerState state) { - String durationStr = state.getContentBuf().toString(); - if (TextUtils.isEmpty(durationStr)) { - return; - } - - try { - long durationMs = DurationParser.inMillis(durationStr); - state.getTempObjects().put(DURATION, (int) durationMs); - } catch (NumberFormatException e) { - Log.e(NSTAG, String.format("Duration '%s' could not be parsed", durationStr)); - } - } - - private void parseSubtitle(HandlerState state) { - String subtitle = state.getContentBuf().toString(); - if (TextUtils.isEmpty(subtitle)) { - return; - } - if (state.getCurrentItem() != null) { - if (TextUtils.isEmpty(state.getCurrentItem().getDescription())) { - state.getCurrentItem().setDescriptionIfLonger(subtitle); + if (state.getCurrentItem() != null) { + state.getCurrentItem().setDescriptionIfLonger(content); + } else if (Rss20.CHANNEL.equals(state.getSecondTag().getName()) && state.getFeed() != null) { + state.getFeed().setDescription(content); } - } else { - if (state.getFeed() != null && TextUtils.isEmpty(state.getFeed().getDescription())) { - state.getFeed().setDescription(subtitle); - } - } - } - - private void parseSummary(HandlerState state, String secondElementName) { - String summary = state.getContentBuf().toString(); - if (TextUtils.isEmpty(summary)) { - return; - } - - if (state.getCurrentItem() != null) { - state.getCurrentItem().setDescriptionIfLonger(summary); - } else if (Rss20.CHANNEL.equals(secondElementName) && state.getFeed() != null) { - state.getFeed().setDescription(summary); } } } diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java index 9ac77a5e6..a39e1b5b7 100644 --- a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java @@ -3,6 +3,7 @@ package de.danoeh.antennapod.parser.feed.namespace; import android.text.TextUtils; import android.util.Log; +import androidx.core.text.HtmlCompat; import de.danoeh.antennapod.parser.feed.HandlerState; import de.danoeh.antennapod.parser.feed.element.SyndElement; import de.danoeh.antennapod.parser.feed.util.DateUtils; @@ -97,6 +98,7 @@ public class Rss20 extends Namespace { } else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) { String contentRaw = state.getContentBuf().toString(); String content = SyndStringUtils.trimAllWhitespace(contentRaw); + String contentFromHtml = HtmlCompat.fromHtml(content, HtmlCompat.FROM_HTML_MODE_COMPACT).toString(); SyndElement topElement = state.getTagstack().peek(); String top = topElement.getName(); SyndElement secondElement = state.getSecondTag(); @@ -113,9 +115,9 @@ public class Rss20 extends Namespace { } } else if (TITLE.equals(top)) { if (ITEM.equals(second) && state.getCurrentItem() != null) { - state.getCurrentItem().setTitle(content); + state.getCurrentItem().setTitle(contentFromHtml); } else if (CHANNEL.equals(second) && state.getFeed() != null) { - state.getFeed().setTitle(content); + state.getFeed().setTitle(contentFromHtml); } } else if (LINK.equals(top)) { if (CHANNEL.equals(second) && state.getFeed() != null) { @@ -132,9 +134,9 @@ public class Rss20 extends Namespace { } } else if (DESCR.equals(localName)) { if (CHANNEL.equals(second) && state.getFeed() != null) { - state.getFeed().setDescription(content); + state.getFeed().setDescription(contentFromHtml); } else if (ITEM.equals(second) && state.getCurrentItem() != null) { - state.getCurrentItem().setDescriptionIfLonger(content); + state.getCurrentItem().setDescriptionIfLonger(contentFromHtml); } } else if (LANGUAGE.equals(localName) && state.getFeed() != null) { state.getFeed().setLanguage(content.toLowerCase(Locale.US));