Avoid skipping parsing Dr. Drang’s JSON Feed, which can’t be detected that it’s a JSON Feed with only partial data, since it has the version element at the end. Add a test for this.

This commit is contained in:
Brent Simmons 2017-12-01 13:19:30 -08:00
parent efcc4fedaa
commit f28d921aff
6 changed files with 42 additions and 3 deletions

View File

@ -102,7 +102,7 @@ extension LocalAccountRefresher: DownloadSessionDelegate {
if data.count > 4096 {
let parserData = ParserData(url: feed.url, data: data)
return FeedParser.canParse(parserData)
return FeedParser.mightBeAbleToParseBasedOnPartialData(parserData)
}
return true

View File

@ -18,7 +18,7 @@ public struct FeedParser {
public static func canParse(_ parserData: ParserData) -> Bool {
let type = feedType(parserData)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom:
return true
@ -27,6 +27,18 @@ public struct FeedParser {
}
}
public static func mightBeAbleToParseBasedOnPartialData(_ parserData: ParserData) -> Bool {
let type = feedType(parserData, isPartialData: true)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom, .unknown:
return true
default:
return false
}
}
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
// This is generally fast enough to call on the main thread 

View File

@ -20,7 +20,7 @@ public enum FeedType {
private let minNumberOfBytesRequired = 128
public func feedType(_ parserData: ParserData) -> FeedType {
public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType {
// Can call with partial data while still downloading, for instance.
// If theres not enough data, return .unknown. Ask again when theres more data.
@ -47,5 +47,15 @@ public func feedType(_ parserData: ParserData) -> FeedType {
return .atom
}
if isPartialData && nsdata.isProbablyJSON() {
// Might not be able to detect a JSON Feed without all data.
// Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// has, at this writing, the JSON version element at the end of the feed,
// which is totally legal but it means not being able to detect
// that its a JSON Feed without all the data.
// So this returns .unknown instead of .notAFeed.
return .unknown
}
return .notAFeed
}

View File

@ -64,6 +64,7 @@
845213281FCB4042003B6E93 /* RSHTMLTag.h in Headers */ = {isa = PBXBuildFile; fileRef = 845213261FCB4042003B6E93 /* RSHTMLTag.h */; settings = {ATTRIBUTES = (Public, ); }; };
845213291FCB4042003B6E93 /* RSHTMLTag.m in Sources */ = {isa = PBXBuildFile; fileRef = 845213271FCB4042003B6E93 /* RSHTMLTag.m */; };
84566D941FD0ABFB00103322 /* allthis.json in Resources */ = {isa = PBXBuildFile; fileRef = 84566D931FD0ABFB00103322 /* allthis.json */; };
84566D961FD1FC1800103322 /* allthis-partial.json in Resources */ = {isa = PBXBuildFile; fileRef = 84566D951FD1FC1800103322 /* allthis-partial.json */; };
84628AAD1FCA10AE00566A9B /* allthis.atom in Resources */ = {isa = PBXBuildFile; fileRef = 84628AAC1FCA10AE00566A9B /* allthis.atom */; };
848674D21FCE7BF600802D1F /* macworld.rss in Resources */ = {isa = PBXBuildFile; fileRef = 848674D11FCE7BF500802D1F /* macworld.rss */; };
849A03D01F0081EA00122600 /* DaringFireball.html in Resources */ = {isa = PBXBuildFile; fileRef = 849A03C51F0081EA00122600 /* DaringFireball.html */; };
@ -166,6 +167,7 @@
845213261FCB4042003B6E93 /* RSHTMLTag.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = RSHTMLTag.h; sourceTree = "<group>"; };
845213271FCB4042003B6E93 /* RSHTMLTag.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = RSHTMLTag.m; sourceTree = "<group>"; };
84566D931FD0ABFB00103322 /* allthis.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = allthis.json; sourceTree = "<group>"; };
84566D951FD1FC1800103322 /* allthis-partial.json */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.json; path = "allthis-partial.json"; sourceTree = "<group>"; };
84628AAC1FCA10AE00566A9B /* allthis.atom */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = allthis.atom; sourceTree = "<group>"; };
848674D11FCE7BF500802D1F /* macworld.rss */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = macworld.rss; sourceTree = "<group>"; };
849A03C51F0081EA00122600 /* DaringFireball.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = DaringFireball.html; sourceTree = "<group>"; };
@ -365,6 +367,7 @@
849A03CE1F0081EA00122600 /* sixcolors.html */,
84628AAC1FCA10AE00566A9B /* allthis.atom */,
84566D931FD0ABFB00103322 /* allthis.json */,
84566D951FD1FC1800103322 /* allthis-partial.json */,
849A03CF1F0081EA00122600 /* Subs.opml */,
);
path = Resources;
@ -546,6 +549,7 @@
849A03DA1F0081EA00122600 /* Subs.opml in Resources */,
849A03D61F0081EA00122600 /* manton.rss in Resources */,
849A03D11F0081EA00122600 /* DaringFireball.rss in Resources */,
84566D961FD1FC1800103322 /* allthis-partial.json in Resources */,
849A03D01F0081EA00122600 /* DaringFireball.html in Resources */,
84566D941FD0ABFB00103322 /* allthis.json in Resources */,
84628AAD1FCA10AE00566A9B /* allthis.atom in Resources */,

View File

@ -127,6 +127,18 @@ class FeedParserTypeTests: XCTestCase {
XCTAssertTrue(type == .jsonFeed)
}
// MARK: Unknown
func testPartialAllThisUnknownFeedType() {
// In the case of this feed, the partial data isnt enough to detect that its a JSON Feed.
// The type detector should return .unknown rather than .notAFeed.
let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/")
let type = feedType(d, isPartialData: true)
XCTAssertEqual(type, .unknown)
}
// MARK: Performance
func testFeedTypePerformance() {

File diff suppressed because one or more lines are too long