Work around some publishers putting html everywhere

This commit is contained in:
ByteHamster 2021-11-16 21:23:52 +01:00
parent 51e44c68c8
commit dd2dd9e4b8
2 changed files with 29 additions and 60 deletions

View File

@ -49,66 +49,33 @@ public class Itunes extends Namespace {
return;
}
if (AUTHOR.equals(localName)) {
parseAuthor(state);
String content = state.getContentBuf().toString();
String contentFromHtml = HtmlCompat.fromHtml(content, HtmlCompat.FROM_HTML_MODE_COMPACT).toString();
if (TextUtils.isEmpty(content)) {
return;
}
if (AUTHOR.equals(localName) && state.getFeed() != null) {
state.getFeed().setAuthor(contentFromHtml);
} else if (DURATION.equals(localName)) {
parseDuration(state);
try {
long durationMs = DurationParser.inMillis(content);
state.getTempObjects().put(DURATION, (int) durationMs);
} catch (NumberFormatException e) {
Log.e(NSTAG, String.format("Duration '%s' could not be parsed", content));
}
} else if (SUBTITLE.equals(localName)) {
parseSubtitle(state);
if (state.getCurrentItem() != null && TextUtils.isEmpty(state.getCurrentItem().getDescription())) {
state.getCurrentItem().setDescriptionIfLonger(content);
} else if (state.getFeed() != null && TextUtils.isEmpty(state.getFeed().getDescription())) {
state.getFeed().setDescription(content);
}
} else if (SUMMARY.equals(localName)) {
SyndElement secondElement = state.getSecondTag();
parseSummary(state, secondElement.getName());
}
}
private void parseAuthor(HandlerState state) {
if (state.getFeed() != null) {
String author = state.getContentBuf().toString();
state.getFeed().setAuthor(HtmlCompat.fromHtml(author,
HtmlCompat.FROM_HTML_MODE_LEGACY).toString());
}
}
private void parseDuration(HandlerState state) {
String durationStr = state.getContentBuf().toString();
if (TextUtils.isEmpty(durationStr)) {
return;
}
try {
long durationMs = DurationParser.inMillis(durationStr);
state.getTempObjects().put(DURATION, (int) durationMs);
} catch (NumberFormatException e) {
Log.e(NSTAG, String.format("Duration '%s' could not be parsed", durationStr));
}
}
private void parseSubtitle(HandlerState state) {
String subtitle = state.getContentBuf().toString();
if (TextUtils.isEmpty(subtitle)) {
return;
}
if (state.getCurrentItem() != null) {
if (TextUtils.isEmpty(state.getCurrentItem().getDescription())) {
state.getCurrentItem().setDescriptionIfLonger(subtitle);
if (state.getCurrentItem() != null) {
state.getCurrentItem().setDescriptionIfLonger(content);
} else if (Rss20.CHANNEL.equals(state.getSecondTag().getName()) && state.getFeed() != null) {
state.getFeed().setDescription(content);
}
} else {
if (state.getFeed() != null && TextUtils.isEmpty(state.getFeed().getDescription())) {
state.getFeed().setDescription(subtitle);
}
}
}
private void parseSummary(HandlerState state, String secondElementName) {
String summary = state.getContentBuf().toString();
if (TextUtils.isEmpty(summary)) {
return;
}
if (state.getCurrentItem() != null) {
state.getCurrentItem().setDescriptionIfLonger(summary);
} else if (Rss20.CHANNEL.equals(secondElementName) && state.getFeed() != null) {
state.getFeed().setDescription(summary);
}
}
}

View File

@ -3,6 +3,7 @@ package de.danoeh.antennapod.parser.feed.namespace;
import android.text.TextUtils;
import android.util.Log;
import androidx.core.text.HtmlCompat;
import de.danoeh.antennapod.parser.feed.HandlerState;
import de.danoeh.antennapod.parser.feed.element.SyndElement;
import de.danoeh.antennapod.parser.feed.util.DateUtils;
@ -97,6 +98,7 @@ public class Rss20 extends Namespace {
} else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) {
String contentRaw = state.getContentBuf().toString();
String content = SyndStringUtils.trimAllWhitespace(contentRaw);
String contentFromHtml = HtmlCompat.fromHtml(content, HtmlCompat.FROM_HTML_MODE_COMPACT).toString();
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
SyndElement secondElement = state.getSecondTag();
@ -113,9 +115,9 @@ public class Rss20 extends Namespace {
}
} else if (TITLE.equals(top)) {
if (ITEM.equals(second) && state.getCurrentItem() != null) {
state.getCurrentItem().setTitle(content);
state.getCurrentItem().setTitle(contentFromHtml);
} else if (CHANNEL.equals(second) && state.getFeed() != null) {
state.getFeed().setTitle(content);
state.getFeed().setTitle(contentFromHtml);
}
} else if (LINK.equals(top)) {
if (CHANNEL.equals(second) && state.getFeed() != null) {
@ -132,9 +134,9 @@ public class Rss20 extends Namespace {
}
} else if (DESCR.equals(localName)) {
if (CHANNEL.equals(second) && state.getFeed() != null) {
state.getFeed().setDescription(content);
state.getFeed().setDescription(contentFromHtml);
} else if (ITEM.equals(second) && state.getCurrentItem() != null) {
state.getCurrentItem().setDescriptionIfLonger(content);
state.getCurrentItem().setDescriptionIfLonger(contentFromHtml);
}
} else if (LANGUAGE.equals(localName) && state.getFeed() != null) {
state.getFeed().setLanguage(content.toLowerCase(Locale.US));