Switched to StringBuffer-method for getting characters in Feedparser

This commit is contained in:
daniel oeh 2012-06-24 17:08:16 +02:00
parent e128e6fd6e
commit e3115f1ae4
5 changed files with 92 additions and 88 deletions

View File

@ -11,6 +11,7 @@ import de.podfetcher.feed.FeedItem;
/** Contains all relevant information to describe the current state of a SyndHandler.*/
public class HandlerState {
/** Feed that the Handler is currently processing. */
protected Feed feed;
protected FeedItem currentItem;
@ -18,6 +19,8 @@ public class HandlerState {
/** Namespaces that have been defined so far. */
protected HashMap<String, Namespace> namespaces;
protected Stack<Namespace> defaultNamespaces;
/** Buffer for saving characters. */
protected StringBuffer contentBuf;
public HandlerState(Feed feed) {
this.feed = feed;
@ -55,6 +58,11 @@ public class HandlerState {
return second;
}
public StringBuffer getContentBuf() {
return contentBuf;
}

View File

@ -20,6 +20,7 @@ public class SyndHandler extends DefaultHandler {
private static final String DEFAULT_PREFIX = "";
protected HandlerState state;
public SyndHandler(Feed feed, TypeGetter.Type type) {
state = new HandlerState(feed);
if (type == TypeGetter.Type.RSS20) {
@ -30,6 +31,7 @@ public class SyndHandler extends DefaultHandler {
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
state.contentBuf = new StringBuffer();
Namespace handler = getHandlingNamespace(uri);
if (handler != null) {
SyndElement element = handler.handleElementStart(localName, state,
@ -43,6 +45,12 @@ public class SyndHandler extends DefaultHandler {
public void characters(char[] ch, int start, int length)
throws SAXException {
if (!state.tagstack.empty()) {
if (state.getTagstack().size() >= 2) {
if (state.contentBuf != null) {
String content = new String(ch, start, length);
state.contentBuf.append(content);
}
}
SyndElement top = state.tagstack.peek();
if (top.getNamespace() != null) {
top.getNamespace().handleCharacters(state, ch, start, length);
@ -59,6 +67,7 @@ public class SyndHandler extends DefaultHandler {
state.tagstack.pop();
}
state.contentBuf = null;
}

View File

@ -25,7 +25,7 @@ public class NSAtom extends Namespace {
private static final String IMAGE = "logo";
private static final String SUBTITLE = "subtitle";
private static final String PUBLISHED = "published";
private static final String TEXT_TYPE = "type";
// Link
private static final String LINK_HREF = "href";
@ -38,9 +38,11 @@ public class NSAtom extends Namespace {
private static final String LINK_REL_ENCLOSURE = "enclosure";
private static final String LINK_REL_RELATED = "related";
private static final String LINK_REL_SELF = "self";
/** Regexp to test whether an Element is a Text Element. */
private static final String isText = TITLE + "|" + CONTENT + "|" + "|" + SUBTITLE;
private static final String isText = TITLE + "|" + CONTENT + "|" + "|"
+ SUBTITLE;
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
@ -61,9 +63,11 @@ public class NSAtom extends Namespace {
} else if (rel.equals(LINK_REL_ENCLOSURE)) {
String strSize = attributes.getValue(LINK_LENGTH);
long size = 0;
if (strSize != null) size = Long.parseLong(strSize);
if (strSize != null)
size = Long.parseLong(strSize);
String type = attributes.getValue(LINK_TYPE);
String download_url = attributes.getValue(LINK_REL_ENCLOSURE);
String download_url = attributes
.getValue(LINK_REL_ENCLOSURE);
state.getCurrentItem().setMedia(
new FeedMedia(state.getCurrentItem(), download_url,
size, type));
@ -80,43 +84,7 @@ public class NSAtom extends Namespace {
@Override
public void handleCharacters(HandlerState state, char[] ch, int start,
int length) {
if (state.getTagstack().size() >= 2) {
AtomText textElement = null;
String content = new String(ch, start, length);
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
SyndElement secondElement = state.getSecondTag();
String second = secondElement.getName();
if (top.matches(isText)) {
textElement = (AtomText) topElement;
textElement.setContent(content);
}
if (top.equals(TITLE)) {
if (second.equals(FEED)) {
state.getFeed().setTitle(textElement.getProcessedContent());
} else if (second.equals(ENTRY)) {
state.getCurrentItem().setTitle(textElement.getProcessedContent());
}
} else if (top.equals(SUBTITLE)) {
if (second.equals(FEED)) {
state.getFeed().setDescription(textElement.getProcessedContent());
}
} else if (top.equals(CONTENT)) {
if (second.equals(ENTRY)) {
state.getCurrentItem().setDescription(textElement.getProcessedContent());
}
} else if (top.equals(PUBLISHED)) {
if (second.equals(ENTRY)) {
state.getCurrentItem().setPubDate(SyndDateUtils.parseRFC3339Date(content));
}
} else if (top.equals(IMAGE)) {
state.getFeed().setImage(new FeedImage(content, null));
}
}
}
@Override
@ -124,6 +92,48 @@ public class NSAtom extends Namespace {
if (localName.equals(ENTRY)) {
state.setCurrentItem(null);
}
if (state.getTagstack().size() >= 2) {
AtomText textElement = null;
String content = state.getContentBuf().toString();
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
SyndElement secondElement = state.getSecondTag();
String second = secondElement.getName();
if (top.matches(isText)) {
textElement = (AtomText) topElement;
textElement.setContent(content);
}
if (top.equals(TITLE)) {
if (second.equals(FEED)) {
state.getFeed().setTitle(textElement.getProcessedContent());
} else if (second.equals(ENTRY)) {
state.getCurrentItem().setTitle(
textElement.getProcessedContent());
}
} else if (top.equals(SUBTITLE)) {
if (second.equals(FEED)) {
state.getFeed().setDescription(
textElement.getProcessedContent());
}
} else if (top.equals(CONTENT)) {
if (second.equals(ENTRY)) {
state.getCurrentItem().setDescription(
textElement.getProcessedContent());
}
} else if (top.equals(PUBLISHED)) {
if (second.equals(ENTRY)) {
state.getCurrentItem().setPubDate(
SyndDateUtils.parseRFC3339Date(content));
}
} else if (top.equals(IMAGE)) {
state.getFeed().setImage(new FeedImage(content, null));
}
}
}
}

View File

@ -14,38 +14,22 @@ public class NSContent extends Namespace {
private static final String ENCODED = "encoded";
private StringBuffer encoded;
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
if (localName.equals(ENCODED)) {
encoded = new StringBuffer();
}
return new SyndElement(localName, this);
}
@Override
public void handleCharacters(HandlerState state, char[] ch, int start,
int length) {
if (state.getTagstack().size() >= 2) {
String content = new String(ch, start, length);
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
SyndElement secondElement = state.getSecondTag();
String second = secondElement.getName();
if (top.equals(ENCODED) && second.equals(NSRSS20.ITEM)) {
encoded.append(content);
}
}
}
@Override
public void handleElementEnd(String localName, HandlerState state) {
if (localName.equals(ENCODED)) {
state.getCurrentItem().setContentEncoded(StringEscapeUtils.unescapeHtml4(encoded.toString()));
encoded = null;
state.getCurrentItem().setContentEncoded(StringEscapeUtils.unescapeHtml4(state.getContentBuf().toString()));
}
}

View File

@ -26,7 +26,7 @@ import org.xml.sax.helpers.DefaultHandler;
public class NSRSS20 extends Namespace {
public static final String NSTAG = "rss";
public static final String NSURI = "";
public final static String CHANNEL = "channel";
public final static String ITEM = "item";
public final static String TITLE = "title";
@ -41,8 +41,6 @@ public class NSRSS20 extends Namespace {
public final static String ENC_LEN = "length";
public final static String ENC_TYPE = "type";
private StringBuffer descriptionBuf;
@Override
public SyndElement handleElementStart(String localName, HandlerState state,
Attributes attributes) {
@ -53,13 +51,13 @@ public class NSRSS20 extends Namespace {
} else if (localName.equals(ENCLOSURE)) {
state.getCurrentItem()
.setMedia(new FeedMedia(state.getCurrentItem(), attributes
.getValue(ENC_URL), Long.parseLong(attributes
.getValue(ENC_LEN)), attributes.getValue(ENC_TYPE)));
.setMedia(
new FeedMedia(state.getCurrentItem(), attributes
.getValue(ENC_URL), Long
.parseLong(attributes.getValue(ENC_LEN)),
attributes.getValue(ENC_TYPE)));
} else if (localName.equals(IMAGE)) {
state.getFeed().setImage(new FeedImage());
} else if (localName.equals(DESCR)) {
descriptionBuf = new StringBuffer();
}
return new SyndElement(localName, this);
}
@ -67,8 +65,14 @@ public class NSRSS20 extends Namespace {
@Override
public void handleCharacters(HandlerState state, char[] ch, int start,
int length) {
if (state.getTagstack().size() >= 2) {
String content = new String(ch, start, length);
}
@Override
public void handleElementEnd(String localName, HandlerState state) {
if (localName.equals(ITEM)) {
state.setCurrentItem(null);
} else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) {
String content = state.getContentBuf().toString();
SyndElement topElement = state.getTagstack().peek();
String top = topElement.getName();
SyndElement secondElement = state.getSecondTag();
@ -81,8 +85,6 @@ public class NSRSS20 extends Namespace {
} else if (second.equals(IMAGE)) {
state.getFeed().getImage().setTitle(IMAGE);
}
} else if (top.equals(DESCR)) {
descriptionBuf.append(content);
} else if (top.equals(LINK)) {
if (second.equals(CHANNEL)) {
state.getFeed().setLink(content);
@ -90,27 +92,18 @@ public class NSRSS20 extends Namespace {
state.getCurrentItem().setLink(content);
}
} else if (top.equals(PUBDATE) && second.equals(ITEM)) {
state.getCurrentItem().setPubDate(SyndDateUtils.parseRFC822Date(content));
state.getCurrentItem().setPubDate(
SyndDateUtils.parseRFC822Date(content));
} else if (top.equals(URL) && second.equals(IMAGE)) {
state.getFeed().getImage().setDownload_url(content);
}
}
}
} else if (localName.equals(DESCR)) {
if (second.equals(CHANNEL)) {
state.getFeed().setDescription(content);
} else {
state.getCurrentItem().setDescription(content);
}
@Override
public void handleElementEnd(String localName, HandlerState state) {
if (localName.equals(ITEM)) {
state.setCurrentItem(null);
} else if (localName.equals(DESCR)) {
SyndElement secondElement = state.getSecondTag();
String second = secondElement.getName();
if (second.equals(CHANNEL)) {
state.getFeed().setDescription(descriptionBuf.toString());
} else {
state.getCurrentItem().setDescription(descriptionBuf.toString());
}
descriptionBuf = null;
}
}