From 7fc0e73ea7bcf21f843a0d94426e8df515182271 Mon Sep 17 00:00:00 2001 From: daniel oeh Date: Mon, 16 Jun 2014 00:16:48 +0200 Subject: [PATCH] Implemented FeedDiscoverer --- .../util/syndication/FeedDiscoverer.java | 78 +++++++++++++ .../util/syndication/FeedDiscovererTest.java | 109 ++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java create mode 100644 src/instrumentationTest/de/test/antennapod/util/syndication/FeedDiscovererTest.java diff --git a/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java b/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java new file mode 100644 index 000000000..ac38ec876 --- /dev/null +++ b/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java @@ -0,0 +1,78 @@ +package de.danoeh.antennapod.util.syndication; + +import android.net.Uri; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.File; +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Finds RSS/Atom URLs in a HTML document using the auto-discovery techniques described here: + *

+ * http://www.rssboard.org/rss-autodiscovery + *

+ * http://blog.whatwg.org/feed-autodiscovery + */ +public class FeedDiscoverer { + + private static final String MIME_RSS = "application/rss+xml"; + private static final String MIME_ATOM = "application/atom+xml"; + + /** + * Discovers links to RSS and Atom feeds in the given File which must be a HTML document. + * + * @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if + * a title cannot be found). + */ + public Map findLinks(File in, String baseUrl) throws IOException { + return findLinks(Jsoup.parse(in, null), baseUrl); + } + + /** + * Discovers links to RSS and Atom feeds in the given File which must be a HTML document. + * + * @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if + * a title cannot be found). + */ + public Map findLinks(String in, String baseUrl) throws IOException { + return findLinks(Jsoup.parse(in), baseUrl); + } + + private Map findLinks(Document document, String baseUrl) { + Map res = new LinkedHashMap(); + Elements links = document.head().getElementsByTag("link"); + for (Element link : links) { + String rel = link.attr("rel"); + String href = link.attr("href"); + if (!StringUtils.isEmpty(href) && + (rel.equals("alternate") || rel.equals("feed"))) { + String type = link.attr("type"); + if (type.equals(MIME_RSS) || type.equals(MIME_ATOM)) { + String title = link.attr("title"); + String processedUrl = processURL(baseUrl, href); + if (processedUrl != null) { + res.put(processedUrl, + (StringUtils.isEmpty(title)) ? href : title); + } + } + } + } + return res; + } + + private String processURL(String baseUrl, String strUrl) { + Uri uri = Uri.parse(strUrl); + if (uri.isRelative()) { + Uri res = Uri.parse(baseUrl).buildUpon().path(strUrl).build(); + return (res != null) ? res.toString() : null; + } else { + return strUrl; + } + } +} diff --git a/src/instrumentationTest/de/test/antennapod/util/syndication/FeedDiscovererTest.java b/src/instrumentationTest/de/test/antennapod/util/syndication/FeedDiscovererTest.java new file mode 100644 index 000000000..51a28089d --- /dev/null +++ b/src/instrumentationTest/de/test/antennapod/util/syndication/FeedDiscovererTest.java @@ -0,0 +1,109 @@ +package instrumentationTest.de.test.antennapod.util.syndication; + +import android.test.InstrumentationTestCase; +import de.danoeh.antennapod.util.syndication.FeedDiscoverer; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; + +import java.io.File; +import java.io.FileOutputStream; +import java.util.Map; + +/** + * Test class for FeedDiscoverer + */ +public class FeedDiscovererTest extends InstrumentationTestCase { + + private FeedDiscoverer fd; + + private File testDir; + + @Override + public void setUp() throws Exception { + super.setUp(); + fd = new FeedDiscoverer(); + testDir = getInstrumentation().getTargetContext().getExternalFilesDir("FeedDiscovererTest"); + testDir.mkdir(); + assertTrue(testDir.exists()); + } + + @Override + protected void tearDown() throws Exception { + FileUtils.deleteDirectory(testDir); + super.tearDown(); + } + + private String createTestHtmlString(String rel, String type, String href, String title) { + return String.format("Test", + rel, type, href, title); + } + + private String createTestHtmlString(String rel, String type, String href) { + return String.format("Test", + rel, type, href); + } + + private void checkFindUrls(boolean isAlternate, boolean isRss, boolean withTitle, boolean isAbsolute, boolean fromString) throws Exception { + final String title = "Test title"; + final String hrefAbs = "http://example.com/feed"; + final String hrefRel = "/feed"; + final String base = "http://example.com"; + + final String rel = (isAlternate) ? "alternate" : "feed"; + final String type = (isRss) ? "application/rss+xml" : "application/atom+xml"; + final String href = (isAbsolute) ? hrefAbs : hrefRel; + + Map res; + String html = (withTitle) ? createTestHtmlString(rel, type, href, title) + : createTestHtmlString(rel, type, href); + if (fromString) { + res = fd.findLinks(html, base); + } else { + File testFile = new File(testDir, "feed"); + FileOutputStream out = new FileOutputStream(testFile); + IOUtils.write(html, out); + out.close(); + res = fd.findLinks(testFile, base); + } + + assertNotNull(res); + assertEquals(1, res.size()); + for (String key : res.keySet()) { + assertEquals(hrefAbs, key); + } + assertTrue(res.containsKey(hrefAbs)); + if (withTitle) { + assertEquals(title, res.get(hrefAbs)); + } else { + assertEquals(href, res.get(hrefAbs)); + } + } + + public void testAlternateRSSWithTitleAbsolute() throws Exception { + checkFindUrls(true, true, true, true, true); + } + + public void testAlternateRSSWithTitleRelative() throws Exception { + checkFindUrls(true, true, true, false, true); + } + + public void testAlternateRSSNoTitleAbsolute() throws Exception { + checkFindUrls(true, true, false, true, true); + } + + public void testAlternateRSSNoTitleRelative() throws Exception { + checkFindUrls(true, true, false, false, true); + } + + public void testAlternateAtomWithTitleAbsolute() throws Exception { + checkFindUrls(true, false, true, true, true); + } + + public void testFeedAtomWithTitleAbsolute() throws Exception { + checkFindUrls(false, false, true, true, true); + } + + public void testAlternateRSSWithTitleAbsoluteFromFile() throws Exception { + checkFindUrls(true, true, true, true, false); + } +}