Implemented FeedDiscoverer
This commit is contained in:
parent
859eabb7a3
commit
7fc0e73ea7
|
@ -0,0 +1,78 @@
|
|||
package de.danoeh.antennapod.util.syndication;
|
||||
|
||||
import android.net.Uri;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Finds RSS/Atom URLs in a HTML document using the auto-discovery techniques described here:
|
||||
* <p/>
|
||||
* http://www.rssboard.org/rss-autodiscovery
|
||||
* <p/>
|
||||
* http://blog.whatwg.org/feed-autodiscovery
|
||||
*/
|
||||
public class FeedDiscoverer {
|
||||
|
||||
private static final String MIME_RSS = "application/rss+xml";
|
||||
private static final String MIME_ATOM = "application/atom+xml";
|
||||
|
||||
/**
|
||||
* Discovers links to RSS and Atom feeds in the given File which must be a HTML document.
|
||||
*
|
||||
* @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if
|
||||
* a title cannot be found).
|
||||
*/
|
||||
public Map<String, String> findLinks(File in, String baseUrl) throws IOException {
|
||||
return findLinks(Jsoup.parse(in, null), baseUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Discovers links to RSS and Atom feeds in the given File which must be a HTML document.
|
||||
*
|
||||
* @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if
|
||||
* a title cannot be found).
|
||||
*/
|
||||
public Map<String, String> findLinks(String in, String baseUrl) throws IOException {
|
||||
return findLinks(Jsoup.parse(in), baseUrl);
|
||||
}
|
||||
|
||||
private Map<String, String> findLinks(Document document, String baseUrl) {
|
||||
Map<String, String> res = new LinkedHashMap<String, String>();
|
||||
Elements links = document.head().getElementsByTag("link");
|
||||
for (Element link : links) {
|
||||
String rel = link.attr("rel");
|
||||
String href = link.attr("href");
|
||||
if (!StringUtils.isEmpty(href) &&
|
||||
(rel.equals("alternate") || rel.equals("feed"))) {
|
||||
String type = link.attr("type");
|
||||
if (type.equals(MIME_RSS) || type.equals(MIME_ATOM)) {
|
||||
String title = link.attr("title");
|
||||
String processedUrl = processURL(baseUrl, href);
|
||||
if (processedUrl != null) {
|
||||
res.put(processedUrl,
|
||||
(StringUtils.isEmpty(title)) ? href : title);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private String processURL(String baseUrl, String strUrl) {
|
||||
Uri uri = Uri.parse(strUrl);
|
||||
if (uri.isRelative()) {
|
||||
Uri res = Uri.parse(baseUrl).buildUpon().path(strUrl).build();
|
||||
return (res != null) ? res.toString() : null;
|
||||
} else {
|
||||
return strUrl;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
package instrumentationTest.de.test.antennapod.util.syndication;
|
||||
|
||||
import android.test.InstrumentationTestCase;
|
||||
import de.danoeh.antennapod.util.syndication.FeedDiscoverer;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Test class for FeedDiscoverer
|
||||
*/
|
||||
public class FeedDiscovererTest extends InstrumentationTestCase {
|
||||
|
||||
private FeedDiscoverer fd;
|
||||
|
||||
private File testDir;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
fd = new FeedDiscoverer();
|
||||
testDir = getInstrumentation().getTargetContext().getExternalFilesDir("FeedDiscovererTest");
|
||||
testDir.mkdir();
|
||||
assertTrue(testDir.exists());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void tearDown() throws Exception {
|
||||
FileUtils.deleteDirectory(testDir);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
private String createTestHtmlString(String rel, String type, String href, String title) {
|
||||
return String.format("<html><head><title>Test</title><link rel=\"%s\" type=\"%s\" href=\"%s\" title=\"%s\"></head><body></body></html>",
|
||||
rel, type, href, title);
|
||||
}
|
||||
|
||||
private String createTestHtmlString(String rel, String type, String href) {
|
||||
return String.format("<html><head><title>Test</title><link rel=\"%s\" type=\"%s\" href=\"%s\"></head><body></body></html>",
|
||||
rel, type, href);
|
||||
}
|
||||
|
||||
private void checkFindUrls(boolean isAlternate, boolean isRss, boolean withTitle, boolean isAbsolute, boolean fromString) throws Exception {
|
||||
final String title = "Test title";
|
||||
final String hrefAbs = "http://example.com/feed";
|
||||
final String hrefRel = "/feed";
|
||||
final String base = "http://example.com";
|
||||
|
||||
final String rel = (isAlternate) ? "alternate" : "feed";
|
||||
final String type = (isRss) ? "application/rss+xml" : "application/atom+xml";
|
||||
final String href = (isAbsolute) ? hrefAbs : hrefRel;
|
||||
|
||||
Map<String, String> res;
|
||||
String html = (withTitle) ? createTestHtmlString(rel, type, href, title)
|
||||
: createTestHtmlString(rel, type, href);
|
||||
if (fromString) {
|
||||
res = fd.findLinks(html, base);
|
||||
} else {
|
||||
File testFile = new File(testDir, "feed");
|
||||
FileOutputStream out = new FileOutputStream(testFile);
|
||||
IOUtils.write(html, out);
|
||||
out.close();
|
||||
res = fd.findLinks(testFile, base);
|
||||
}
|
||||
|
||||
assertNotNull(res);
|
||||
assertEquals(1, res.size());
|
||||
for (String key : res.keySet()) {
|
||||
assertEquals(hrefAbs, key);
|
||||
}
|
||||
assertTrue(res.containsKey(hrefAbs));
|
||||
if (withTitle) {
|
||||
assertEquals(title, res.get(hrefAbs));
|
||||
} else {
|
||||
assertEquals(href, res.get(hrefAbs));
|
||||
}
|
||||
}
|
||||
|
||||
public void testAlternateRSSWithTitleAbsolute() throws Exception {
|
||||
checkFindUrls(true, true, true, true, true);
|
||||
}
|
||||
|
||||
public void testAlternateRSSWithTitleRelative() throws Exception {
|
||||
checkFindUrls(true, true, true, false, true);
|
||||
}
|
||||
|
||||
public void testAlternateRSSNoTitleAbsolute() throws Exception {
|
||||
checkFindUrls(true, true, false, true, true);
|
||||
}
|
||||
|
||||
public void testAlternateRSSNoTitleRelative() throws Exception {
|
||||
checkFindUrls(true, true, false, false, true);
|
||||
}
|
||||
|
||||
public void testAlternateAtomWithTitleAbsolute() throws Exception {
|
||||
checkFindUrls(true, false, true, true, true);
|
||||
}
|
||||
|
||||
public void testFeedAtomWithTitleAbsolute() throws Exception {
|
||||
checkFindUrls(false, false, true, true, true);
|
||||
}
|
||||
|
||||
public void testAlternateRSSWithTitleAbsoluteFromFile() throws Exception {
|
||||
checkFindUrls(true, true, true, true, false);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue