Add isProbablyJSON method to FeedType detector.

This commit is contained in:
Brent Simmons 2024-09-12 21:47:59 -07:00
parent 1159d45e5f
commit 0ffb878d9c
2 changed files with 64 additions and 36 deletions

View File

@ -26,12 +26,11 @@ public enum FeedType: Sendable {
// If theres not enough data, return .unknown. Ask again when theres more data.
// If its definitely not a feed, return .notAFeed.
if data.count < minNumberOfBytesRequired {
let count = data.count
if count < minNumberOfBytesRequired {
return .unknown
}
let count = data.count
return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
guard let baseAddress = pointer.baseAddress else {
@ -45,32 +44,18 @@ public enum FeedType: Sendable {
if isProbablyAtom(cCharPointer, count) {
return .atom
}
if isPartialData && isProbablyJSON(cCharPointer, count) {
// Might not be able to detect a JSON Feed without all data.
// Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// has, at this writing, the JSON version element at the end of the feed,
// which is totally legal but it means not being able to detect
// that its a JSON Feed without all the data.
// So this returns .unknown instead of .notAFeed.
return .unknown
}
return .unknown
return .notAFeed
}
// if d.isProbablyJSONFeed() {
// return .jsonFeed
// }
// if d.isProbablyRSSInJSON() {
// return .rssInJSON
// }
// if d.isProbablyAtom() {
// return .atom
// }
//
// if isPartialData && d.isProbablyJSON() {
// // Might not be able to detect a JSON Feed without all data.
// // Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// // has, at this writing, the JSON version element at the end of the feed,
// // which is totally legal but it means not being able to detect
// // that its a JSON Feed without all the data.
// // So this returns .unknown instead of .notAFeed.
// return .unknown
// }
// return .notAFeed
// return type
}
}
@ -90,9 +75,52 @@ private extension FeedType {
didFindString("<feed", bytes, count)
}
static func isProbablyJSON(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
bytesStartWithStringIgnoringWhitespace("{", bytes, count)
}
static func didFindString(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
let foundString = strnstr(bytes, string, numberOfBytes)
return foundString != nil
}
struct Whitespace {
static let space = Character(" ").asciiValue!
static let `return` = Character("\r").asciiValue!
static let newline = Character("\n").asciiValue!
static let tab = Character("\t").asciiValue!
}
static func bytesStartWithStringIgnoringWhitespace(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
var i = 0
while i < numberOfBytes {
let ch = bytes[i]
if ch == Whitespace.space || ch == Whitespace.return || ch == Whitespace.newline || ch == Whitespace.tab {
i += 1
continue
}
if ch == string[0] {
if let found = strnstr(bytes, string, numberOfBytes) {
return found == bytes + i
}
}
// Allow for a BOM of up to four bytes (assuming BOM is only at the start)
if i < 4 {
i += 1
continue
}
break
}
return false
}
}

View File

@ -183,15 +183,15 @@ class FeedParserTypeTests: XCTestCase {
// MARK: Unknown
// func testPartialAllThisUnknownFeedType() {
//
// // In the case of this feed, the partial data isnt enough to detect that its a JSON Feed.
// // The type detector should return .unknown rather than .notAFeed.
//
// let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/")
// let type = feedType(d, isPartialData: true)
// XCTAssertEqual(type, .unknown)
// }
func testPartialAllThisUnknownFeedType() {
// In the case of this feed, the partial data isnt enough to detect that its a JSON Feed.
// The type detector should return .unknown rather than .notAFeed.
let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/")
let type = FeedType.feedType(d.data, isPartialData: true)
XCTAssertEqual(type, .unknown)
}
// MARK: Performance