Switched to StringBuffer-method for getting characters in Feedparser
This commit is contained in:
parent
e128e6fd6e
commit
e3115f1ae4
|
@ -11,6 +11,7 @@ import de.podfetcher.feed.FeedItem;
|
|||
|
||||
/** Contains all relevant information to describe the current state of a SyndHandler.*/
|
||||
public class HandlerState {
|
||||
|
||||
/** Feed that the Handler is currently processing. */
|
||||
protected Feed feed;
|
||||
protected FeedItem currentItem;
|
||||
|
@ -18,6 +19,8 @@ public class HandlerState {
|
|||
/** Namespaces that have been defined so far. */
|
||||
protected HashMap<String, Namespace> namespaces;
|
||||
protected Stack<Namespace> defaultNamespaces;
|
||||
/** Buffer for saving characters. */
|
||||
protected StringBuffer contentBuf;
|
||||
|
||||
public HandlerState(Feed feed) {
|
||||
this.feed = feed;
|
||||
|
@ -55,6 +58,11 @@ public class HandlerState {
|
|||
return second;
|
||||
}
|
||||
|
||||
public StringBuffer getContentBuf() {
|
||||
return contentBuf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ public class SyndHandler extends DefaultHandler {
|
|||
private static final String DEFAULT_PREFIX = "";
|
||||
protected HandlerState state;
|
||||
|
||||
|
||||
public SyndHandler(Feed feed, TypeGetter.Type type) {
|
||||
state = new HandlerState(feed);
|
||||
if (type == TypeGetter.Type.RSS20) {
|
||||
|
@ -30,6 +31,7 @@ public class SyndHandler extends DefaultHandler {
|
|||
@Override
|
||||
public void startElement(String uri, String localName, String qName,
|
||||
Attributes attributes) throws SAXException {
|
||||
state.contentBuf = new StringBuffer();
|
||||
Namespace handler = getHandlingNamespace(uri);
|
||||
if (handler != null) {
|
||||
SyndElement element = handler.handleElementStart(localName, state,
|
||||
|
@ -43,6 +45,12 @@ public class SyndHandler extends DefaultHandler {
|
|||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
if (!state.tagstack.empty()) {
|
||||
if (state.getTagstack().size() >= 2) {
|
||||
if (state.contentBuf != null) {
|
||||
String content = new String(ch, start, length);
|
||||
state.contentBuf.append(content);
|
||||
}
|
||||
}
|
||||
SyndElement top = state.tagstack.peek();
|
||||
if (top.getNamespace() != null) {
|
||||
top.getNamespace().handleCharacters(state, ch, start, length);
|
||||
|
@ -59,6 +67,7 @@ public class SyndHandler extends DefaultHandler {
|
|||
state.tagstack.pop();
|
||||
|
||||
}
|
||||
state.contentBuf = null;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ public class NSAtom extends Namespace {
|
|||
private static final String IMAGE = "logo";
|
||||
private static final String SUBTITLE = "subtitle";
|
||||
private static final String PUBLISHED = "published";
|
||||
|
||||
|
||||
private static final String TEXT_TYPE = "type";
|
||||
// Link
|
||||
private static final String LINK_HREF = "href";
|
||||
|
@ -38,9 +38,11 @@ public class NSAtom extends Namespace {
|
|||
private static final String LINK_REL_ENCLOSURE = "enclosure";
|
||||
private static final String LINK_REL_RELATED = "related";
|
||||
private static final String LINK_REL_SELF = "self";
|
||||
|
||||
|
||||
/** Regexp to test whether an Element is a Text Element. */
|
||||
private static final String isText = TITLE + "|" + CONTENT + "|" + "|" + SUBTITLE;
|
||||
private static final String isText = TITLE + "|" + CONTENT + "|" + "|"
|
||||
+ SUBTITLE;
|
||||
|
||||
@Override
|
||||
public SyndElement handleElementStart(String localName, HandlerState state,
|
||||
Attributes attributes) {
|
||||
|
@ -61,9 +63,11 @@ public class NSAtom extends Namespace {
|
|||
} else if (rel.equals(LINK_REL_ENCLOSURE)) {
|
||||
String strSize = attributes.getValue(LINK_LENGTH);
|
||||
long size = 0;
|
||||
if (strSize != null) size = Long.parseLong(strSize);
|
||||
if (strSize != null)
|
||||
size = Long.parseLong(strSize);
|
||||
String type = attributes.getValue(LINK_TYPE);
|
||||
String download_url = attributes.getValue(LINK_REL_ENCLOSURE);
|
||||
String download_url = attributes
|
||||
.getValue(LINK_REL_ENCLOSURE);
|
||||
state.getCurrentItem().setMedia(
|
||||
new FeedMedia(state.getCurrentItem(), download_url,
|
||||
size, type));
|
||||
|
@ -80,43 +84,7 @@ public class NSAtom extends Namespace {
|
|||
@Override
|
||||
public void handleCharacters(HandlerState state, char[] ch, int start,
|
||||
int length) {
|
||||
if (state.getTagstack().size() >= 2) {
|
||||
AtomText textElement = null;
|
||||
String content = new String(ch, start, length);
|
||||
SyndElement topElement = state.getTagstack().peek();
|
||||
String top = topElement.getName();
|
||||
SyndElement secondElement = state.getSecondTag();
|
||||
String second = secondElement.getName();
|
||||
|
||||
if (top.matches(isText)) {
|
||||
textElement = (AtomText) topElement;
|
||||
textElement.setContent(content);
|
||||
}
|
||||
|
||||
if (top.equals(TITLE)) {
|
||||
|
||||
if (second.equals(FEED)) {
|
||||
state.getFeed().setTitle(textElement.getProcessedContent());
|
||||
} else if (second.equals(ENTRY)) {
|
||||
state.getCurrentItem().setTitle(textElement.getProcessedContent());
|
||||
}
|
||||
} else if (top.equals(SUBTITLE)) {
|
||||
if (second.equals(FEED)) {
|
||||
state.getFeed().setDescription(textElement.getProcessedContent());
|
||||
}
|
||||
} else if (top.equals(CONTENT)) {
|
||||
if (second.equals(ENTRY)) {
|
||||
state.getCurrentItem().setDescription(textElement.getProcessedContent());
|
||||
}
|
||||
} else if (top.equals(PUBLISHED)) {
|
||||
if (second.equals(ENTRY)) {
|
||||
state.getCurrentItem().setPubDate(SyndDateUtils.parseRFC3339Date(content));
|
||||
}
|
||||
} else if (top.equals(IMAGE)) {
|
||||
state.getFeed().setImage(new FeedImage(content, null));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -124,6 +92,48 @@ public class NSAtom extends Namespace {
|
|||
if (localName.equals(ENTRY)) {
|
||||
state.setCurrentItem(null);
|
||||
}
|
||||
|
||||
if (state.getTagstack().size() >= 2) {
|
||||
AtomText textElement = null;
|
||||
String content = state.getContentBuf().toString();
|
||||
SyndElement topElement = state.getTagstack().peek();
|
||||
String top = topElement.getName();
|
||||
SyndElement secondElement = state.getSecondTag();
|
||||
String second = secondElement.getName();
|
||||
|
||||
if (top.matches(isText)) {
|
||||
textElement = (AtomText) topElement;
|
||||
textElement.setContent(content);
|
||||
}
|
||||
|
||||
if (top.equals(TITLE)) {
|
||||
|
||||
if (second.equals(FEED)) {
|
||||
state.getFeed().setTitle(textElement.getProcessedContent());
|
||||
} else if (second.equals(ENTRY)) {
|
||||
state.getCurrentItem().setTitle(
|
||||
textElement.getProcessedContent());
|
||||
}
|
||||
} else if (top.equals(SUBTITLE)) {
|
||||
if (second.equals(FEED)) {
|
||||
state.getFeed().setDescription(
|
||||
textElement.getProcessedContent());
|
||||
}
|
||||
} else if (top.equals(CONTENT)) {
|
||||
if (second.equals(ENTRY)) {
|
||||
state.getCurrentItem().setDescription(
|
||||
textElement.getProcessedContent());
|
||||
}
|
||||
} else if (top.equals(PUBLISHED)) {
|
||||
if (second.equals(ENTRY)) {
|
||||
state.getCurrentItem().setPubDate(
|
||||
SyndDateUtils.parseRFC3339Date(content));
|
||||
}
|
||||
} else if (top.equals(IMAGE)) {
|
||||
state.getFeed().setImage(new FeedImage(content, null));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,38 +14,22 @@ public class NSContent extends Namespace {
|
|||
|
||||
private static final String ENCODED = "encoded";
|
||||
|
||||
private StringBuffer encoded;
|
||||
|
||||
@Override
|
||||
public SyndElement handleElementStart(String localName, HandlerState state,
|
||||
Attributes attributes) {
|
||||
if (localName.equals(ENCODED)) {
|
||||
encoded = new StringBuffer();
|
||||
}
|
||||
return new SyndElement(localName, this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleCharacters(HandlerState state, char[] ch, int start,
|
||||
int length) {
|
||||
if (state.getTagstack().size() >= 2) {
|
||||
String content = new String(ch, start, length);
|
||||
SyndElement topElement = state.getTagstack().peek();
|
||||
String top = topElement.getName();
|
||||
SyndElement secondElement = state.getSecondTag();
|
||||
String second = secondElement.getName();
|
||||
if (top.equals(ENCODED) && second.equals(NSRSS20.ITEM)) {
|
||||
encoded.append(content);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleElementEnd(String localName, HandlerState state) {
|
||||
if (localName.equals(ENCODED)) {
|
||||
state.getCurrentItem().setContentEncoded(StringEscapeUtils.unescapeHtml4(encoded.toString()));
|
||||
encoded = null;
|
||||
state.getCurrentItem().setContentEncoded(StringEscapeUtils.unescapeHtml4(state.getContentBuf().toString()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.xml.sax.helpers.DefaultHandler;
|
|||
public class NSRSS20 extends Namespace {
|
||||
public static final String NSTAG = "rss";
|
||||
public static final String NSURI = "";
|
||||
|
||||
|
||||
public final static String CHANNEL = "channel";
|
||||
public final static String ITEM = "item";
|
||||
public final static String TITLE = "title";
|
||||
|
@ -41,8 +41,6 @@ public class NSRSS20 extends Namespace {
|
|||
public final static String ENC_LEN = "length";
|
||||
public final static String ENC_TYPE = "type";
|
||||
|
||||
private StringBuffer descriptionBuf;
|
||||
|
||||
@Override
|
||||
public SyndElement handleElementStart(String localName, HandlerState state,
|
||||
Attributes attributes) {
|
||||
|
@ -53,13 +51,13 @@ public class NSRSS20 extends Namespace {
|
|||
|
||||
} else if (localName.equals(ENCLOSURE)) {
|
||||
state.getCurrentItem()
|
||||
.setMedia(new FeedMedia(state.getCurrentItem(), attributes
|
||||
.getValue(ENC_URL), Long.parseLong(attributes
|
||||
.getValue(ENC_LEN)), attributes.getValue(ENC_TYPE)));
|
||||
.setMedia(
|
||||
new FeedMedia(state.getCurrentItem(), attributes
|
||||
.getValue(ENC_URL), Long
|
||||
.parseLong(attributes.getValue(ENC_LEN)),
|
||||
attributes.getValue(ENC_TYPE)));
|
||||
} else if (localName.equals(IMAGE)) {
|
||||
state.getFeed().setImage(new FeedImage());
|
||||
} else if (localName.equals(DESCR)) {
|
||||
descriptionBuf = new StringBuffer();
|
||||
}
|
||||
return new SyndElement(localName, this);
|
||||
}
|
||||
|
@ -67,8 +65,14 @@ public class NSRSS20 extends Namespace {
|
|||
@Override
|
||||
public void handleCharacters(HandlerState state, char[] ch, int start,
|
||||
int length) {
|
||||
if (state.getTagstack().size() >= 2) {
|
||||
String content = new String(ch, start, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleElementEnd(String localName, HandlerState state) {
|
||||
if (localName.equals(ITEM)) {
|
||||
state.setCurrentItem(null);
|
||||
} else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) {
|
||||
String content = state.getContentBuf().toString();
|
||||
SyndElement topElement = state.getTagstack().peek();
|
||||
String top = topElement.getName();
|
||||
SyndElement secondElement = state.getSecondTag();
|
||||
|
@ -81,8 +85,6 @@ public class NSRSS20 extends Namespace {
|
|||
} else if (second.equals(IMAGE)) {
|
||||
state.getFeed().getImage().setTitle(IMAGE);
|
||||
}
|
||||
} else if (top.equals(DESCR)) {
|
||||
descriptionBuf.append(content);
|
||||
} else if (top.equals(LINK)) {
|
||||
if (second.equals(CHANNEL)) {
|
||||
state.getFeed().setLink(content);
|
||||
|
@ -90,27 +92,18 @@ public class NSRSS20 extends Namespace {
|
|||
state.getCurrentItem().setLink(content);
|
||||
}
|
||||
} else if (top.equals(PUBDATE) && second.equals(ITEM)) {
|
||||
state.getCurrentItem().setPubDate(SyndDateUtils.parseRFC822Date(content));
|
||||
state.getCurrentItem().setPubDate(
|
||||
SyndDateUtils.parseRFC822Date(content));
|
||||
} else if (top.equals(URL) && second.equals(IMAGE)) {
|
||||
state.getFeed().getImage().setDownload_url(content);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} else if (localName.equals(DESCR)) {
|
||||
if (second.equals(CHANNEL)) {
|
||||
state.getFeed().setDescription(content);
|
||||
} else {
|
||||
state.getCurrentItem().setDescription(content);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleElementEnd(String localName, HandlerState state) {
|
||||
if (localName.equals(ITEM)) {
|
||||
state.setCurrentItem(null);
|
||||
} else if (localName.equals(DESCR)) {
|
||||
SyndElement secondElement = state.getSecondTag();
|
||||
String second = secondElement.getName();
|
||||
if (second.equals(CHANNEL)) {
|
||||
state.getFeed().setDescription(descriptionBuf.toString());
|
||||
} else {
|
||||
state.getCurrentItem().setDescription(descriptionBuf.toString());
|
||||
}
|
||||
descriptionBuf = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue