Add case for detecting <https://www.natashatherobot.com/feed/> as an RSS feed.

It’s missing the opening <rss> tag, but it has enough other distinct markers that we can detect it as RSS.

Add two tests to make sure it’s detected and that the parser handles it.
This commit is contained in:
Brent Simmons 2017-12-07 20:05:58 -08:00
parent cc857ef0b1
commit 3d72ba4b44
5 changed files with 1090 additions and 1 deletions

View File

@ -86,6 +86,7 @@
849A03E81F01F88600122600 /* ScriptingNews.json in Resources */ = {isa = PBXBuildFile; fileRef = 849A03E71F01F88600122600 /* ScriptingNews.json */; }; 849A03E81F01F88600122600 /* ScriptingNews.json in Resources */ = {isa = PBXBuildFile; fileRef = 849A03E71F01F88600122600 /* ScriptingNews.json */; };
849A03EA1F01F92B00122600 /* inessential.json in Resources */ = {isa = PBXBuildFile; fileRef = 849A03E91F01F92B00122600 /* inessential.json */; }; 849A03EA1F01F92B00122600 /* inessential.json in Resources */ = {isa = PBXBuildFile; fileRef = 849A03E91F01F92B00122600 /* inessential.json */; };
849A03EC1F01FCDC00122600 /* RSSInJSONParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 849A03EB1F01FCDC00122600 /* RSSInJSONParserTests.swift */; }; 849A03EC1F01FCDC00122600 /* RSSInJSONParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 849A03EB1F01FCDC00122600 /* RSSInJSONParserTests.swift */; };
84B19A771FDA438300458981 /* natasha.xml in Resources */ = {isa = PBXBuildFile; fileRef = 84B19A761FDA438300458981 /* natasha.xml */; };
84D81BDC1EFA28E700652332 /* RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84D81BDA1EFA28E700652332 /* RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 84D81BDC1EFA28E700652332 /* RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84D81BDA1EFA28E700652332 /* RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; };
84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */; }; 84D81BDE1EFA2B7D00652332 /* ParsedFeed.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */; };
84D81BE01EFA2BAE00652332 /* FeedType.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDF1EFA2BAE00652332 /* FeedType.swift */; }; 84D81BE01EFA2BAE00652332 /* FeedType.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D81BDF1EFA2BAE00652332 /* FeedType.swift */; };
@ -189,6 +190,7 @@
849A03E71F01F88600122600 /* ScriptingNews.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = ScriptingNews.json; sourceTree = "<group>"; }; 849A03E71F01F88600122600 /* ScriptingNews.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = ScriptingNews.json; sourceTree = "<group>"; };
849A03E91F01F92B00122600 /* inessential.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = inessential.json; sourceTree = "<group>"; }; 849A03E91F01F92B00122600 /* inessential.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = inessential.json; sourceTree = "<group>"; };
849A03EB1F01FCDC00122600 /* RSSInJSONParserTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = RSSInJSONParserTests.swift; sourceTree = "<group>"; }; 849A03EB1F01FCDC00122600 /* RSSInJSONParserTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = RSSInJSONParserTests.swift; sourceTree = "<group>"; };
84B19A761FDA438300458981 /* natasha.xml */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = natasha.xml; sourceTree = "<group>"; };
84D81BD91EFA28E700652332 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; 84D81BD91EFA28E700652332 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
84D81BDA1EFA28E700652332 /* RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParser.h; sourceTree = "<group>"; }; 84D81BDA1EFA28E700652332 /* RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParser.h; sourceTree = "<group>"; };
84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = ParsedFeed.swift; path = Feeds/ParsedFeed.swift; sourceTree = "<group>"; }; 84D81BDD1EFA2B7D00652332 /* ParsedFeed.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = ParsedFeed.swift; path = Feeds/ParsedFeed.swift; sourceTree = "<group>"; };
@ -360,6 +362,7 @@
849A03CA1F0081EA00122600 /* KatieFloyd.rss */, 849A03CA1F0081EA00122600 /* KatieFloyd.rss */,
848674D11FCE7BF500802D1F /* macworld.rss */, 848674D11FCE7BF500802D1F /* macworld.rss */,
849A03CB1F0081EA00122600 /* manton.rss */, 849A03CB1F0081EA00122600 /* manton.rss */,
84B19A761FDA438300458981 /* natasha.xml */,
849A03CC1F0081EA00122600 /* OneFootTsunami.atom */, 849A03CC1F0081EA00122600 /* OneFootTsunami.atom */,
849A03E71F01F88600122600 /* ScriptingNews.json */, 849A03E71F01F88600122600 /* ScriptingNews.json */,
849A03CD1F0081EA00122600 /* scriptingNews.rss */, 849A03CD1F0081EA00122600 /* scriptingNews.rss */,
@ -549,6 +552,7 @@
849A03DA1F0081EA00122600 /* Subs.opml in Resources */, 849A03DA1F0081EA00122600 /* Subs.opml in Resources */,
849A03D61F0081EA00122600 /* manton.rss in Resources */, 849A03D61F0081EA00122600 /* manton.rss in Resources */,
849A03D11F0081EA00122600 /* DaringFireball.rss in Resources */, 849A03D11F0081EA00122600 /* DaringFireball.rss in Resources */,
84B19A771FDA438300458981 /* natasha.xml in Resources */,
84566D961FD1FC1800103322 /* allthis-partial.json in Resources */, 84566D961FD1FC1800103322 /* allthis-partial.json in Resources */,
849A03D01F0081EA00122600 /* DaringFireball.html in Resources */, 849A03D01F0081EA00122600 /* DaringFireball.html in Resources */,
84566D941FD0ABFB00103322 /* allthis.json in Resources */, 84566D941FD0ABFB00103322 /* allthis.json in Resources */,

View File

@ -85,6 +85,13 @@ class FeedParserTypeTests: XCTestCase {
XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
} }
func testNatashaTheRobotRSSType() {
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
let type = feedType(d)
XCTAssertTrue(type == .rss)
}
// MARK: Atom // MARK: Atom
func testDaringFireballAtomType() { func testDaringFireballAtomType() {

View File

@ -47,4 +47,11 @@ class RSSParserTests: XCTestCase {
} }
} }
func testNatashaTheRobot() {
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
let parsedFeed = try! FeedParser.parse(d)!
XCTAssertEqual(parsedFeed.items.count, 10)
}
} }

File diff suppressed because it is too large Load Diff

View File

@ -60,7 +60,12 @@ static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes);
return NO; return NO;
} }
return didFindString("<rss", self.bytes, self.length) || didFindString("<rdf:RDF", self.bytes, self.length); if (didFindString("<rss", self.bytes, self.length) || didFindString("<rdf:RDF", self.bytes, self.length)) {
return YES;
}
// At this writing (7 Dec. 2017), https://www.natashatherobot.com/feed/ is missing an opening <rss> tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find.
return (didFindString("<channel>", self.bytes, self.length) && didFindString("<pubDate>", self.bytes, self.length));
} }
- (BOOL)isProbablyAtom { - (BOOL)isProbablyAtom {