Start porting FeedType to Swift.

This commit is contained in:
Brent Simmons 2024-09-11 21:53:58 -07:00
parent 6d798ee167
commit 860ecfd58c
2 changed files with 156 additions and 129 deletions

View File

@ -10,45 +10,52 @@ import Foundation
import SAX import SAX
public enum FeedType: Sendable { public enum FeedType: Sendable {
case rss case rss
case atom case atom
case jsonFeed case jsonFeed
case rssInJSON case rssInJSON
case unknown case unknown
case notAFeed case notAFeed
private static let minNumberOfBytesRequired = 128
static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType {
// Can call with partial data while still downloading, for instance.
// If theres not enough data, return .unknown. Ask again when theres more data.
// If its definitely not a feed, return .notAFeed.
if data.count < minNumberOfBytesRequired {
return .unknown
} }
let count = data.count
//private let minNumberOfBytesRequired = 128 return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
//
//public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { guard let baseAddress = pointer.baseAddress else {
// return .unknown
// // Can call with partial data while still downloading, for instance. }
// // If theres not enough data, return .unknown. Ask again when theres more data. let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self)
// // If its definitely not a feed, return .notAFeed.
// // if isProbablyRSS(cCharPointer, count) {
// // This is fast enough to call on the main thread. return .rss
// }
// if parserData.data.count < minNumberOfBytesRequired {
// return .unknown return .unknown
// } }
// // if d.isProbablyJSONFeed() {
// let nsdata = parserData.data as NSData
//
// if nsdata.isProbablyJSONFeed() {
// return .jsonFeed // return .jsonFeed
// } // }
// if nsdata.isProbablyRSSInJSON() { // if d.isProbablyRSSInJSON() {
// return .rssInJSON // return .rssInJSON
// } // }
// if nsdata.isProbablyRSS() { // if d.isProbablyAtom() {
// return .rss
// }
// if nsdata.isProbablyAtom() {
// return .atom // return .atom
// } // }
// //
// if isPartialData && nsdata.isProbablyJSON() { // if isPartialData && d.isProbablyJSON() {
// // Might not be able to detect a JSON Feed without all data. // // Might not be able to detect a JSON Feed without all data.
// // Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests) // // Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// // has, at this writing, the JSON version element at the end of the feed, // // has, at this writing, the JSON version element at the end of the feed,
@ -57,6 +64,27 @@ public enum FeedType: Sendable {
// // So this returns .unknown instead of .notAFeed. // // So this returns .unknown instead of .notAFeed.
// return .unknown // return .unknown
// } // }
//
// return .notAFeed // return .notAFeed
//}
// return type
}
}
private extension FeedType {
static func isProbablyRSS(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
if didFindString("<rss", bytes, count) || didFindString("<rdf:RDF", bytes, count) {
return true
}
return didFindString("<channel>", bytes, count) && didFindString("<pubDate>", bytes, count)
}
static func didFindString(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
let foundString = strnstr(bytes, string, numberOfBytes)
return foundString != nil
}
}

View File

@ -7,13 +7,13 @@
// //
import XCTest import XCTest
import FeedParser @testable import FeedParser
import SAX import SAX
//class FeedParserTypeTests: XCTestCase { class FeedParserTypeTests: XCTestCase {
//
// // MARK: HTML // MARK: HTML
//
// func testDaringFireballHTMLType() { // func testDaringFireballHTMLType() {
// //
// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") // let d = parserData("DaringFireball", "html", "http://daringfireball.net/")
@ -41,79 +41,79 @@ import SAX
// let type = feedType(d) // let type = feedType(d)
// XCTAssertTrue(type == .notAFeed) // XCTAssertTrue(type == .notAFeed)
// } // }
//
// // MARK: RSS // MARK: RSS
//
// func testEMarleyRSSType() { func testEMarleyRSSType() {
//
// let d = parserData("EMarley", "rss", "https://medium.com/@emarley") let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testScriptingNewsRSSType() { func testScriptingNewsRSSType() {
//
// let d = parserData("scriptingNews", "rss", "http://scripting.com/") let d = parserData("scriptingNews", "rss", "http://scripting.com/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testKatieFloydRSSType() { func testKatieFloydRSSType() {
//
// let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testMantonRSSType() { func testMantonRSSType() {
//
// let d = parserData("manton", "rss", "http://manton.org/") let d = parserData("manton", "rss", "http://manton.org/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testDCRainmakerRSSType() { func testDCRainmakerRSSType() {
//
// let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testMacworldRSSType() { func testMacworldRSSType() {
//
// let d = parserData("macworld", "rss", "https://www.macworld.com/") let d = parserData("macworld", "rss", "https://www.macworld.com/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testNatashaTheRobotRSSType() { func testNatashaTheRobotRSSType() {
//
// let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testDontHitSaveRSSWithBOMType() { func testDontHitSaveRSSWithBOMType() {
//
// let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testBioRDF() { func testBioRDF() {
// let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") let d = parserData("bio", "rdf", "http://connect.biorxiv.org/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// func testPHPXML() { func testPHPXML() {
// let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/")
// let type = feedType(d) let type = FeedType.feedType(d.data)
// XCTAssertTrue(type == .rss) XCTAssertTrue(type == .rss)
// } }
//
// // MARK: Atom // MARK: Atom
//
// func testDaringFireballAtomType() { // func testDaringFireballAtomType() {
// //
// // File extension is .rss, but its really an Atom feed. // // File extension is .rss, but its really an Atom feed.
@ -180,9 +180,9 @@ import SAX
// let type = feedType(d) // let type = feedType(d)
// XCTAssertTrue(type == .jsonFeed) // XCTAssertTrue(type == .jsonFeed)
// } // }
//
// // MARK: Unknown // MARK: Unknown
//
// func testPartialAllThisUnknownFeedType() { // func testPartialAllThisUnknownFeedType() {
// //
// // In the case of this feed, the partial data isnt enough to detect that its a JSON Feed. // // In the case of this feed, the partial data isnt enough to detect that its a JSON Feed.
@ -192,9 +192,9 @@ import SAX
// let type = feedType(d, isPartialData: true) // let type = feedType(d, isPartialData: true)
// XCTAssertEqual(type, .unknown) // XCTAssertEqual(type, .unknown)
// } // }
//
// // MARK: Performance // MARK: Performance
//
// func testFeedTypePerformance() { // func testFeedTypePerformance() {
// //
// // 0.000 on my 2012 iMac. // // 0.000 on my 2012 iMac.
@ -204,7 +204,7 @@ import SAX
// let _ = feedType(d) // let _ = feedType(d)
// } // }
// } // }
//
// func testFeedTypePerformance2() { // func testFeedTypePerformance2() {
// //
// // 0.000 on my 2012 iMac. // // 0.000 on my 2012 iMac.
@ -214,7 +214,7 @@ import SAX
// let _ = feedType(d) // let _ = feedType(d)
// } // }
// } // }
//
// func testFeedTypePerformance3() { // func testFeedTypePerformance3() {
// //
// // 0.000 on my 2012 iMac. // // 0.000 on my 2012 iMac.
@ -234,8 +234,7 @@ import SAX
// let _ = feedType(d) // let _ = feedType(d)
// } // }
// } // }
// }
//}
func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData {
let filename = "Resources/\(filename)" let filename = "Resources/\(filename)"