From e3115f1ae4f89b75fecf3c5e3763b4b38cc09899 Mon Sep 17 00:00:00 2001 From: daniel oeh Date: Sun, 24 Jun 2012 17:08:16 +0200 Subject: [PATCH] Switched to StringBuffer-method for getting characters in Feedparser --- .../syndication/handler/HandlerState.java | 8 ++ .../syndication/handler/SyndHandler.java | 9 ++ .../syndication/namespace/atom/NSAtom.java | 94 ++++++++++--------- .../namespace/content/NSContent.java | 18 +--- .../syndication/namespace/rss20/NSRSS20.java | 51 +++++----- 5 files changed, 92 insertions(+), 88 deletions(-) diff --git a/src/de/podfetcher/syndication/handler/HandlerState.java b/src/de/podfetcher/syndication/handler/HandlerState.java index a95cae6f0..4488fd8fa 100644 --- a/src/de/podfetcher/syndication/handler/HandlerState.java +++ b/src/de/podfetcher/syndication/handler/HandlerState.java @@ -11,6 +11,7 @@ import de.podfetcher.feed.FeedItem; /** Contains all relevant information to describe the current state of a SyndHandler.*/ public class HandlerState { + /** Feed that the Handler is currently processing. */ protected Feed feed; protected FeedItem currentItem; @@ -18,6 +19,8 @@ public class HandlerState { /** Namespaces that have been defined so far. */ protected HashMap namespaces; protected Stack defaultNamespaces; + /** Buffer for saving characters. */ + protected StringBuffer contentBuf; public HandlerState(Feed feed) { this.feed = feed; @@ -55,6 +58,11 @@ public class HandlerState { return second; } + public StringBuffer getContentBuf() { + return contentBuf; + } + + diff --git a/src/de/podfetcher/syndication/handler/SyndHandler.java b/src/de/podfetcher/syndication/handler/SyndHandler.java index 795e3a24a..3cecabd64 100644 --- a/src/de/podfetcher/syndication/handler/SyndHandler.java +++ b/src/de/podfetcher/syndication/handler/SyndHandler.java @@ -20,6 +20,7 @@ public class SyndHandler extends DefaultHandler { private static final String DEFAULT_PREFIX = ""; protected HandlerState state; + public SyndHandler(Feed feed, TypeGetter.Type type) { state = new HandlerState(feed); if (type == TypeGetter.Type.RSS20) { @@ -30,6 +31,7 @@ public class SyndHandler extends DefaultHandler { @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { + state.contentBuf = new StringBuffer(); Namespace handler = getHandlingNamespace(uri); if (handler != null) { SyndElement element = handler.handleElementStart(localName, state, @@ -43,6 +45,12 @@ public class SyndHandler extends DefaultHandler { public void characters(char[] ch, int start, int length) throws SAXException { if (!state.tagstack.empty()) { + if (state.getTagstack().size() >= 2) { + if (state.contentBuf != null) { + String content = new String(ch, start, length); + state.contentBuf.append(content); + } + } SyndElement top = state.tagstack.peek(); if (top.getNamespace() != null) { top.getNamespace().handleCharacters(state, ch, start, length); @@ -59,6 +67,7 @@ public class SyndHandler extends DefaultHandler { state.tagstack.pop(); } + state.contentBuf = null; } diff --git a/src/de/podfetcher/syndication/namespace/atom/NSAtom.java b/src/de/podfetcher/syndication/namespace/atom/NSAtom.java index ece9f8674..3005e8a6d 100644 --- a/src/de/podfetcher/syndication/namespace/atom/NSAtom.java +++ b/src/de/podfetcher/syndication/namespace/atom/NSAtom.java @@ -25,7 +25,7 @@ public class NSAtom extends Namespace { private static final String IMAGE = "logo"; private static final String SUBTITLE = "subtitle"; private static final String PUBLISHED = "published"; - + private static final String TEXT_TYPE = "type"; // Link private static final String LINK_HREF = "href"; @@ -38,9 +38,11 @@ public class NSAtom extends Namespace { private static final String LINK_REL_ENCLOSURE = "enclosure"; private static final String LINK_REL_RELATED = "related"; private static final String LINK_REL_SELF = "self"; - + /** Regexp to test whether an Element is a Text Element. */ - private static final String isText = TITLE + "|" + CONTENT + "|" + "|" + SUBTITLE; + private static final String isText = TITLE + "|" + CONTENT + "|" + "|" + + SUBTITLE; + @Override public SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes) { @@ -61,9 +63,11 @@ public class NSAtom extends Namespace { } else if (rel.equals(LINK_REL_ENCLOSURE)) { String strSize = attributes.getValue(LINK_LENGTH); long size = 0; - if (strSize != null) size = Long.parseLong(strSize); + if (strSize != null) + size = Long.parseLong(strSize); String type = attributes.getValue(LINK_TYPE); - String download_url = attributes.getValue(LINK_REL_ENCLOSURE); + String download_url = attributes + .getValue(LINK_REL_ENCLOSURE); state.getCurrentItem().setMedia( new FeedMedia(state.getCurrentItem(), download_url, size, type)); @@ -80,43 +84,7 @@ public class NSAtom extends Namespace { @Override public void handleCharacters(HandlerState state, char[] ch, int start, int length) { - if (state.getTagstack().size() >= 2) { - AtomText textElement = null; - String content = new String(ch, start, length); - SyndElement topElement = state.getTagstack().peek(); - String top = topElement.getName(); - SyndElement secondElement = state.getSecondTag(); - String second = secondElement.getName(); - - if (top.matches(isText)) { - textElement = (AtomText) topElement; - textElement.setContent(content); - } - - if (top.equals(TITLE)) { - - if (second.equals(FEED)) { - state.getFeed().setTitle(textElement.getProcessedContent()); - } else if (second.equals(ENTRY)) { - state.getCurrentItem().setTitle(textElement.getProcessedContent()); - } - } else if (top.equals(SUBTITLE)) { - if (second.equals(FEED)) { - state.getFeed().setDescription(textElement.getProcessedContent()); - } - } else if (top.equals(CONTENT)) { - if (second.equals(ENTRY)) { - state.getCurrentItem().setDescription(textElement.getProcessedContent()); - } - } else if (top.equals(PUBLISHED)) { - if (second.equals(ENTRY)) { - state.getCurrentItem().setPubDate(SyndDateUtils.parseRFC3339Date(content)); - } - } else if (top.equals(IMAGE)) { - state.getFeed().setImage(new FeedImage(content, null)); - } - - } + } @Override @@ -124,6 +92,48 @@ public class NSAtom extends Namespace { if (localName.equals(ENTRY)) { state.setCurrentItem(null); } + + if (state.getTagstack().size() >= 2) { + AtomText textElement = null; + String content = state.getContentBuf().toString(); + SyndElement topElement = state.getTagstack().peek(); + String top = topElement.getName(); + SyndElement secondElement = state.getSecondTag(); + String second = secondElement.getName(); + + if (top.matches(isText)) { + textElement = (AtomText) topElement; + textElement.setContent(content); + } + + if (top.equals(TITLE)) { + + if (second.equals(FEED)) { + state.getFeed().setTitle(textElement.getProcessedContent()); + } else if (second.equals(ENTRY)) { + state.getCurrentItem().setTitle( + textElement.getProcessedContent()); + } + } else if (top.equals(SUBTITLE)) { + if (second.equals(FEED)) { + state.getFeed().setDescription( + textElement.getProcessedContent()); + } + } else if (top.equals(CONTENT)) { + if (second.equals(ENTRY)) { + state.getCurrentItem().setDescription( + textElement.getProcessedContent()); + } + } else if (top.equals(PUBLISHED)) { + if (second.equals(ENTRY)) { + state.getCurrentItem().setPubDate( + SyndDateUtils.parseRFC3339Date(content)); + } + } else if (top.equals(IMAGE)) { + state.getFeed().setImage(new FeedImage(content, null)); + } + + } } } diff --git a/src/de/podfetcher/syndication/namespace/content/NSContent.java b/src/de/podfetcher/syndication/namespace/content/NSContent.java index d9f3501c9..d5dba9101 100644 --- a/src/de/podfetcher/syndication/namespace/content/NSContent.java +++ b/src/de/podfetcher/syndication/namespace/content/NSContent.java @@ -14,38 +14,22 @@ public class NSContent extends Namespace { private static final String ENCODED = "encoded"; - private StringBuffer encoded; - @Override public SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes) { - if (localName.equals(ENCODED)) { - encoded = new StringBuffer(); - } return new SyndElement(localName, this); } @Override public void handleCharacters(HandlerState state, char[] ch, int start, int length) { - if (state.getTagstack().size() >= 2) { - String content = new String(ch, start, length); - SyndElement topElement = state.getTagstack().peek(); - String top = topElement.getName(); - SyndElement secondElement = state.getSecondTag(); - String second = secondElement.getName(); - if (top.equals(ENCODED) && second.equals(NSRSS20.ITEM)) { - encoded.append(content); - } - } } @Override public void handleElementEnd(String localName, HandlerState state) { if (localName.equals(ENCODED)) { - state.getCurrentItem().setContentEncoded(StringEscapeUtils.unescapeHtml4(encoded.toString())); - encoded = null; + state.getCurrentItem().setContentEncoded(StringEscapeUtils.unescapeHtml4(state.getContentBuf().toString())); } } diff --git a/src/de/podfetcher/syndication/namespace/rss20/NSRSS20.java b/src/de/podfetcher/syndication/namespace/rss20/NSRSS20.java index fc3a3927b..f245882ad 100644 --- a/src/de/podfetcher/syndication/namespace/rss20/NSRSS20.java +++ b/src/de/podfetcher/syndication/namespace/rss20/NSRSS20.java @@ -26,7 +26,7 @@ import org.xml.sax.helpers.DefaultHandler; public class NSRSS20 extends Namespace { public static final String NSTAG = "rss"; public static final String NSURI = ""; - + public final static String CHANNEL = "channel"; public final static String ITEM = "item"; public final static String TITLE = "title"; @@ -41,8 +41,6 @@ public class NSRSS20 extends Namespace { public final static String ENC_LEN = "length"; public final static String ENC_TYPE = "type"; - private StringBuffer descriptionBuf; - @Override public SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes) { @@ -53,13 +51,13 @@ public class NSRSS20 extends Namespace { } else if (localName.equals(ENCLOSURE)) { state.getCurrentItem() - .setMedia(new FeedMedia(state.getCurrentItem(), attributes - .getValue(ENC_URL), Long.parseLong(attributes - .getValue(ENC_LEN)), attributes.getValue(ENC_TYPE))); + .setMedia( + new FeedMedia(state.getCurrentItem(), attributes + .getValue(ENC_URL), Long + .parseLong(attributes.getValue(ENC_LEN)), + attributes.getValue(ENC_TYPE))); } else if (localName.equals(IMAGE)) { state.getFeed().setImage(new FeedImage()); - } else if (localName.equals(DESCR)) { - descriptionBuf = new StringBuffer(); } return new SyndElement(localName, this); } @@ -67,8 +65,14 @@ public class NSRSS20 extends Namespace { @Override public void handleCharacters(HandlerState state, char[] ch, int start, int length) { - if (state.getTagstack().size() >= 2) { - String content = new String(ch, start, length); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (localName.equals(ITEM)) { + state.setCurrentItem(null); + } else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) { + String content = state.getContentBuf().toString(); SyndElement topElement = state.getTagstack().peek(); String top = topElement.getName(); SyndElement secondElement = state.getSecondTag(); @@ -81,8 +85,6 @@ public class NSRSS20 extends Namespace { } else if (second.equals(IMAGE)) { state.getFeed().getImage().setTitle(IMAGE); } - } else if (top.equals(DESCR)) { - descriptionBuf.append(content); } else if (top.equals(LINK)) { if (second.equals(CHANNEL)) { state.getFeed().setLink(content); @@ -90,27 +92,18 @@ public class NSRSS20 extends Namespace { state.getCurrentItem().setLink(content); } } else if (top.equals(PUBDATE) && second.equals(ITEM)) { - state.getCurrentItem().setPubDate(SyndDateUtils.parseRFC822Date(content)); + state.getCurrentItem().setPubDate( + SyndDateUtils.parseRFC822Date(content)); } else if (top.equals(URL) && second.equals(IMAGE)) { state.getFeed().getImage().setDownload_url(content); - } - } - - } + } else if (localName.equals(DESCR)) { + if (second.equals(CHANNEL)) { + state.getFeed().setDescription(content); + } else { + state.getCurrentItem().setDescription(content); + } - @Override - public void handleElementEnd(String localName, HandlerState state) { - if (localName.equals(ITEM)) { - state.setCurrentItem(null); - } else if (localName.equals(DESCR)) { - SyndElement secondElement = state.getSecondTag(); - String second = secondElement.getName(); - if (second.equals(CHANNEL)) { - state.getFeed().setDescription(descriptionBuf.toString()); - } else { - state.getCurrentItem().setDescription(descriptionBuf.toString()); } - descriptionBuf = null; } }