Make feed parsing as save as possible

This commit is contained in:
Martin Fietz 2016-03-08 09:46:58 +01:00
parent 2c4aaea708
commit 5e7b328d83
6 changed files with 137 additions and 106 deletions

View File

@ -1,8 +1,9 @@
package de.danoeh.antennapod.core.syndication.namespace;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
import org.xml.sax.Attributes;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
public class NSContent extends Namespace {
public static final String NSTAG = "content";
public static final String NSURI = "http://purl.org/rss/1.0/modules/content/";
@ -17,7 +18,8 @@ public class NSContent extends Namespace {
@Override
public void handleElementEnd(String localName, HandlerState state) {
if (localName.equals(ENCODED)) {
if (ENCODED.equals(localName) && state.getCurrentItem() != null &&
state.getContentBuf() != null) {
state.getCurrentItem().setContentEncoded(state.getContentBuf().toString());
}
}

View File

@ -2,6 +2,9 @@ package de.danoeh.antennapod.core.syndication.namespace;
import org.xml.sax.Attributes;
import java.util.Stack;
import de.danoeh.antennapod.core.feed.FeedItem;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
import de.danoeh.antennapod.core.util.DateUtils;
@ -21,16 +24,18 @@ public class NSDublinCore extends Namespace {
@Override
public void handleElementEnd(String localName, HandlerState state) {
if(state.getTagstack().size() >= 2
&& state.getContentBuf() != null) {
String content = state.getContentBuf().toString();
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
SyndElement secondElement = state.getSecondTag();
String second = secondElement.getName();
if (top.equals(DATE) && second.equals(ITEM)) {
state.getCurrentItem().setPubDate(
DateUtils.parse(content));
if (state.getCurrentItem() != null && state.getTagstack().size() >= 2 &&
state.getContentBuf() != null) {
FeedItem currentItem = state.getCurrentItem();
Stack<SyndElement> tagStack = state.getTagstack();
if(tagStack.size() < 2) {
return;
}
String top = tagStack.peek().getName();
String second = state.getSecondTag().getName();
if (DATE.equals(top) && ITEM.equals(second)) {
String content = state.getContentBuf().toString();
currentItem.setPubDate(DateUtils.parse(content));
}
}
}

View File

@ -1,6 +1,7 @@
package de.danoeh.antennapod.core.syndication.namespace;
import android.text.TextUtils;
import android.util.Log;
import org.xml.sax.Attributes;
@ -10,6 +11,7 @@ import de.danoeh.antennapod.core.feed.FeedImage;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
public class NSITunes extends Namespace {
public static final String NSTAG = "itunes";
public static final String NSURI = "http://www.itunes.com/dtds/podcast-1.0.dtd";
@ -26,69 +28,91 @@ public class NSITunes extends Namespace {
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
if (localName.equals(IMAGE)) {
if (IMAGE.equals(localName)) {
FeedImage image = new FeedImage();
image.setTitle(IMAGE_TITLE);
image.setDownload_url(attributes.getValue(IMAGE_HREF));
if (state.getCurrentItem() != null) {
// this is an items image
image.setTitle(state.getCurrentItem().getTitle()+IMAGE_TITLE);
image.setTitle(state.getCurrentItem().getTitle() + IMAGE_TITLE);
image.setOwner(state.getCurrentItem());
state.getCurrentItem().setImage(image);
} else {
} else {
// this is the feed image
// prefer to all other images
if(!TextUtils.isEmpty(image.getDownload_url())) {
if (!TextUtils.isEmpty(image.getDownload_url())) {
image.setOwner(state.getFeed());
state.getFeed().setImage(image);
}
}
}
return new SyndElement(localName, this);
}
@Override
public void handleElementEnd(String localName, HandlerState state) {
if (localName.equals(AUTHOR)) {
state.getFeed().setAuthor(state.getContentBuf().toString());
} else if (localName.equals(DURATION)) {
String[] parts = state.getContentBuf().toString().trim().split(":");
if (AUTHOR.equals(localName)) {
if (state.getContentBuf() != null && state.getFeed() != null) {
String author = state.getContentBuf().toString();
if(TextUtils.isEmpty(author)) {
return;
}
state.getFeed().setAuthor(author);
}
} else if (DURATION.equals(localName)) {
if (state.getContentBuf() == null) {
return;
}
String duration = state.getContentBuf().toString();
if(TextUtils.isEmpty(duration)) {
return;
}
String[] parts = duration.trim().split(":");
try {
int duration = 0;
int durationMs = 0;
if (parts.length == 2) {
duration += TimeUnit.MINUTES.toMillis(Long.parseLong(parts[0])) +
durationMs += TimeUnit.MINUTES.toMillis(Long.parseLong(parts[0])) +
TimeUnit.SECONDS.toMillis(Long.parseLong(parts[1]));
} else if (parts.length >= 3) {
duration += TimeUnit.HOURS.toMillis(Long.parseLong(parts[0])) +
durationMs += TimeUnit.HOURS.toMillis(Long.parseLong(parts[0])) +
TimeUnit.MINUTES.toMillis(Long.parseLong(parts[1])) +
TimeUnit.SECONDS.toMillis(Long.parseLong(parts[2]));
} else {
return;
}
state.getTempObjects().put(DURATION, duration);
state.getTempObjects().put(DURATION, durationMs);
} catch (NumberFormatException e) {
e.printStackTrace();
Log.e(NSTAG, Log.getStackTraceString(e));
}
} else if (SUBTITLE.equals(localName)) {
if (state.getContentBuf() == null) {
return;
}
} else if (localName.equals(SUBTITLE)) {
String subtitle = state.getContentBuf().toString();
if (TextUtils.isEmpty(subtitle)) {
return;
}
if (state.getCurrentItem() != null) {
if (TextUtils.isEmpty(state.getCurrentItem().getDescription())) {
state.getCurrentItem().setDescription(subtitle);
}
} else {
if (TextUtils.isEmpty(state.getFeed().getDescription())) {
if (state.getFeed() != null && TextUtils.isEmpty(state.getFeed().getDescription())) {
state.getFeed().setDescription(subtitle);
}
}
} else if (localName.equals(SUMMARY)) {
} else if (SUMMARY.equals(localName)) {
if (state.getContentBuf() == null) {
return;
}
String summary = state.getContentBuf().toString();
if (TextUtils.isEmpty(summary)) {
return;
}
if (state.getCurrentItem() != null) {
state.getCurrentItem().setDescription(summary);
} else {
} else if (state.getFeed() != null) {
state.getFeed().setDescription(summary);
}
}

View File

@ -1,14 +1,15 @@
package de.danoeh.antennapod.core.syndication.namespace;
import android.util.Log;
import de.danoeh.antennapod.core.BuildConfig;
import de.danoeh.antennapod.core.feed.FeedMedia;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
import de.danoeh.antennapod.core.syndication.util.SyndTypeUtils;
import org.xml.sax.Attributes;
import java.util.concurrent.TimeUnit;
import de.danoeh.antennapod.core.feed.FeedMedia;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
import de.danoeh.antennapod.core.syndication.util.SyndTypeUtils;
/** Processes tags from the http://search.yahoo.com/mrss/ namespace. */
public class NSMedia extends Namespace {
private static final String TAG = "NSMedia";
@ -25,10 +26,10 @@ public class NSMedia extends Namespace {
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
if (localName.equals(CONTENT)) {
if (CONTENT.equals(localName)) {
String url = attributes.getValue(DOWNLOAD_URL);
String type = attributes.getValue(MIME_TYPE);
if (state.getCurrentItem().getMedia() == null
if (state.getCurrentItem() != null && state.getCurrentItem().getMedia() == null
&& url != null
&& (SyndTypeUtils.enclosureTypeValid(type) || ((type = SyndTypeUtils
.getValidMimeTypeFromUrl(url)) != null))) {
@ -37,24 +38,24 @@ public class NSMedia extends Namespace {
try {
size = Long.parseLong(attributes.getValue(SIZE));
} catch (NumberFormatException e) {
if (BuildConfig.DEBUG)
Log.d(TAG, "Length attribute could not be parsed.");
Log.e(TAG, "Length attribute could not be parsed.");
}
int duration = 0;
try {
String durationStr = attributes.getValue(DURATION);
if (durationStr != null) {
String durationStr = attributes.getValue(DURATION);
if (durationStr != null) {
try {
duration = (int) TimeUnit.MILLISECONDS.convert(
Long.parseLong(durationStr), TimeUnit.SECONDS);
Long.parseLong(durationStr), TimeUnit.SECONDS);
} catch (NumberFormatException e) {
Log.e(TAG, "Duration attribute could not be parsed");
}
} catch (NumberFormatException e) {
if (BuildConfig.DEBUG)
Log.d(TAG, "Duration attribute could not be parsed");
}
state.getCurrentItem().setMedia(
new FeedMedia(state.getCurrentItem(), url, size, type));
FeedMedia media = new FeedMedia(state.getCurrentItem(), url, size, type);
if(duration > 0) {
media.setDuration(duration);
}
state.getCurrentItem().setMedia(media);
}
}
return new SyndElement(localName, this);

View File

@ -1,10 +1,10 @@
package de.danoeh.antennapod.core.syndication.namespace;
import android.text.TextUtils;
import android.util.Log;
import org.xml.sax.Attributes;
import de.danoeh.antennapod.core.BuildConfig;
import de.danoeh.antennapod.core.feed.Feed;
import de.danoeh.antennapod.core.feed.FeedImage;
import de.danoeh.antennapod.core.feed.FeedItem;
@ -43,16 +43,16 @@ public class NSRSS20 extends Namespace {
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
if (localName.equals(ITEM)) {
if (ITEM.equals(localName)) {
state.setCurrentItem(new FeedItem());
state.getItems().add(state.getCurrentItem());
state.getCurrentItem().setFeed(state.getFeed());
} else if (localName.equals(ENCLOSURE)) {
} else if (ENCLOSURE.equals(localName)) {
String type = attributes.getValue(ENC_TYPE);
String url = attributes.getValue(ENC_URL);
if (state.getCurrentItem().getMedia() == null
&& (SyndTypeUtils.enclosureTypeValid(type) || ((type = SyndTypeUtils
if (state.getCurrentItem() != null && state.getCurrentItem().getMedia() == null &&
(SyndTypeUtils.enclosureTypeValid(type) || ((type = SyndTypeUtils
.getValidMimeTypeFromUrl(url)) != null))) {
long size = 0;
@ -63,19 +63,18 @@ public class NSRSS20 extends Namespace {
size = 0;
}
} catch (NumberFormatException e) {
if (BuildConfig.DEBUG)
Log.d(TAG, "Length attribute could not be parsed.");
Log.d(TAG, "Length attribute could not be parsed.");
}
state.getCurrentItem().setMedia(
new FeedMedia(state.getCurrentItem(), url, size, type));
}
} else if (localName.equals(IMAGE)) {
} else if (IMAGE.equals(localName)) {
if (state.getTagstack().size() >= 1) {
String parent = state.getTagstack().peek().getName();
if (parent.equals(CHANNEL)) {
if (CHANNEL.equals(parent)) {
Feed feed = state.getFeed();
if(feed.getImage() == null) {
if(feed != null && feed.getImage() == null) {
feed.setImage(new FeedImage());
feed.getImage().setOwner(state.getFeed());
}
@ -87,26 +86,26 @@ public class NSRSS20 extends Namespace {
@Override
public void handleElementEnd(String localName, HandlerState state) {
if (localName.equals(ITEM)) {
if (ITEM.equals(localName)) {
if (state.getCurrentItem() != null) {
FeedItem currentItem = state.getCurrentItem();
// the title tag is optional in RSS 2.0. The description is used
// as a
// title if the item has no title-tag.
if (state.getCurrentItem().getTitle() == null) {
state.getCurrentItem().setTitle(
state.getCurrentItem().getDescription());
if (currentItem.getTitle() == null) {
currentItem.setTitle(currentItem.getDescription());
}
if (state.getTempObjects().containsKey(NSITunes.DURATION)) {
if (state.getCurrentItem().hasMedia()) {
state.getCurrentItem().getMedia().setDuration((Integer) state.getTempObjects().get(NSITunes.DURATION));
if (currentItem.hasMedia()) {
Integer duration = (Integer) state.getTempObjects().get(NSITunes.DURATION);
currentItem.getMedia().setDuration(duration);
}
state.getTempObjects().remove(NSITunes.DURATION);
}
}
state.setCurrentItem(null);
} else if (state.getTagstack().size() >= 2
&& state.getContentBuf() != null) {
} else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) {
String content = state.getContentBuf().toString();
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
@ -117,45 +116,45 @@ public class NSRSS20 extends Namespace {
third = state.getThirdTag().getName();
}
if (top.equals(GUID) && second.equals(ITEM)) {
if (GUID.equals(top) && ITEM.equals(second)) {
// some feed creators include an empty or non-standard guid-element in their feed, which should be ignored
if (!content.isEmpty()) {
if (!TextUtils.isEmpty(content) && state.getCurrentItem() != null) {
state.getCurrentItem().setItemIdentifier(content);
}
} else if (top.equals(TITLE)) {
} else if (TITLE.equals(top)) {
String title = content.trim();
if (second.equals(ITEM)) {
if (ITEM.equals(second) && state.getCurrentItem() != null) {
state.getCurrentItem().setTitle(title);
} else if (second.equals(CHANNEL)) {
} else if (CHANNEL.equals(second) && state.getFeed() != null) {
state.getFeed().setTitle(title);
} else if (second.equals(IMAGE) && third != null
&& third.equals(CHANNEL)) {
if(state.getFeed().getImage().getTitle() == null) {
} else if (IMAGE.equals(second) && third != null && CHANNEL.equals(third)) {
if(state.getFeed() != null && state.getFeed().getImage() != null &&
state.getFeed().getImage().getTitle() == null) {
state.getFeed().getImage().setTitle(title);
}
}
} else if (top.equals(LINK)) {
if (second.equals(CHANNEL)) {
} else if (LINK.equals(top)) {
if (CHANNEL.equals(second) && state.getFeed() != null) {
state.getFeed().setLink(content);
} else if (second.equals(ITEM)) {
} else if (ITEM.equals(second) && state.getCurrentItem() != null) {
state.getCurrentItem().setLink(content);
}
} else if (top.equals(PUBDATE) && second.equals(ITEM)) {
state.getCurrentItem().setPubDate(
DateUtils.parse(content));
} else if (top.equals(URL) && second.equals(IMAGE) && third != null
&& third.equals(CHANNEL)) {
if(state.getFeed().getImage().getDownload_url() == null) { // prefer itunes:image
} else if (PUBDATE.equals(top) && ITEM.equals(second) && state.getCurrentItem() != null) {
state.getCurrentItem().setPubDate(DateUtils.parse(content));
} else if (URL.equals(top) && IMAGE.equals(second) && third != null
&& CHANNEL.equals(third)) {
// prefer itunes:image
if(state.getFeed() != null && state.getFeed().getImage() != null &&
state.getFeed().getImage().getDownload_url() == null) {
state.getFeed().getImage().setDownload_url(content);
}
} else if (localName.equals(DESCR)) {
if (second.equals(CHANNEL)) {
} else if (DESCR.equals(localName)) {
if (CHANNEL.equals(second) && state.getFeed() != null) {
state.getFeed().setDescription(content);
} else if (second.equals(ITEM)) {
} else if (ITEM.equals(second) && state.getCurrentItem() != null) {
state.getCurrentItem().setDescription(content);
}
} else if (localName.equals(LANGUAGE)) {
} else if (LANGUAGE.equals(localName) && state.getFeed() != null) {
state.getFeed().setLanguage(content.toLowerCase());
}
}

View File

@ -6,8 +6,7 @@ import org.xml.sax.Attributes;
import java.util.ArrayList;
import de.danoeh.antennapod.core.BuildConfig;
import de.danoeh.antennapod.core.feed.Chapter;
import de.danoeh.antennapod.core.feed.FeedItem;
import de.danoeh.antennapod.core.feed.SimpleChapter;
import de.danoeh.antennapod.core.syndication.handler.HandlerState;
import de.danoeh.antennapod.core.util.DateUtils;
@ -27,21 +26,22 @@ public class NSSimpleChapters extends Namespace {
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
if (localName.equals(CHAPTERS)) {
state.getCurrentItem().setChapters(new ArrayList<Chapter>());
} else if (localName.equals(CHAPTER)) {
try {
state.getCurrentItem()
.getChapters()
.add(new SimpleChapter(DateUtils
.parseTimeString(attributes.getValue(START)),
attributes.getValue(TITLE), state.getCurrentItem(),
attributes.getValue(HREF)));
} catch (NumberFormatException e) {
if (BuildConfig.DEBUG) Log.w(TAG, "Unable to read chapter", e);
FeedItem currentItem = state.getCurrentItem();
if(currentItem != null) {
if (localName.equals(CHAPTERS)) {
currentItem.setChapters(new ArrayList<>());
} else if (localName.equals(CHAPTER)) {
try {
long start = DateUtils.parseTimeString(attributes.getValue(START));
String title = attributes.getValue(TITLE);
String link = attributes.getValue(HREF);
SimpleChapter chapter = new SimpleChapter(start, title, currentItem, link);
currentItem.getChapters().add(chapter);
} catch (NumberFormatException e) {
Log.e(TAG, "Unable to read chapter", e);
}
}
}
return new SyndElement(localName, this);
}