Start porting FeedType to Swift.

This commit is contained in:
Brent Simmons 2024-09-11 21:53:58 -07:00
parent 6d798ee167
commit 860ecfd58c
2 changed files with 156 additions and 129 deletions

View File

@ -10,53 +10,81 @@ import Foundation
import SAX
public enum FeedType: Sendable {
case rss
case atom
case jsonFeed
case rssInJSON
case unknown
case notAFeed
private static let minNumberOfBytesRequired = 128
static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType {
// Can call with partial data while still downloading, for instance.
// If theres not enough data, return .unknown. Ask again when theres more data.
// If its definitely not a feed, return .notAFeed.
if data.count < minNumberOfBytesRequired {
return .unknown
}
let count = data.count
return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
guard let baseAddress = pointer.baseAddress else {
return .unknown
}
let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self)
if isProbablyRSS(cCharPointer, count) {
return .rss
}
return .unknown
}
// if d.isProbablyJSONFeed() {
// return .jsonFeed
// }
// if d.isProbablyRSSInJSON() {
// return .rssInJSON
// }
// if d.isProbablyAtom() {
// return .atom
// }
//
// if isPartialData && d.isProbablyJSON() {
// // Might not be able to detect a JSON Feed without all data.
// // Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// // has, at this writing, the JSON version element at the end of the feed,
// // which is totally legal but it means not being able to detect
// // that its a JSON Feed without all the data.
// // So this returns .unknown instead of .notAFeed.
// return .unknown
// }
// return .notAFeed
// return type
}
}
private extension FeedType {
//private let minNumberOfBytesRequired = 128
//
//public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType {
//
// // Can call with partial data while still downloading, for instance.
// // If theres not enough data, return .unknown. Ask again when theres more data.
// // If its definitely not a feed, return .notAFeed.
// //
// // This is fast enough to call on the main thread.
//
// if parserData.data.count < minNumberOfBytesRequired {
// return .unknown
// }
//
// let nsdata = parserData.data as NSData
//
// if nsdata.isProbablyJSONFeed() {
// return .jsonFeed
// }
// if nsdata.isProbablyRSSInJSON() {
// return .rssInJSON
// }
// if nsdata.isProbablyRSS() {
// return .rss
// }
// if nsdata.isProbablyAtom() {
// return .atom
// }
//
// if isPartialData && nsdata.isProbablyJSON() {
// // Might not be able to detect a JSON Feed without all data.
// // Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// // has, at this writing, the JSON version element at the end of the feed,
// // which is totally legal but it means not being able to detect
// // that its a JSON Feed without all the data.
// // So this returns .unknown instead of .notAFeed.
// return .unknown
// }
//
// return .notAFeed
//}
static func isProbablyRSS(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
if didFindString("<rss", bytes, count) || didFindString("<rdf:RDF", bytes, count) {
return true
}
return didFindString("<channel>", bytes, count) && didFindString("<pubDate>", bytes, count)
}
static func didFindString(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
let foundString = strnstr(bytes, string, numberOfBytes)
return foundString != nil
}
}

View File

@ -7,13 +7,13 @@
//
import XCTest
import FeedParser
@testable import FeedParser
import SAX
//class FeedParserTypeTests: XCTestCase {
//
// // MARK: HTML
//
class FeedParserTypeTests: XCTestCase {
// MARK: HTML
// func testDaringFireballHTMLType() {
//
// let d = parserData("DaringFireball", "html", "http://daringfireball.net/")
@ -41,79 +41,79 @@ import SAX
// let type = feedType(d)
// XCTAssertTrue(type == .notAFeed)
// }
//
// // MARK: RSS
//
// func testEMarleyRSSType() {
//
// let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testScriptingNewsRSSType() {
//
// let d = parserData("scriptingNews", "rss", "http://scripting.com/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testKatieFloydRSSType() {
//
// let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testMantonRSSType() {
//
// let d = parserData("manton", "rss", "http://manton.org/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testDCRainmakerRSSType() {
//
// let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testMacworldRSSType() {
//
// let d = parserData("macworld", "rss", "https://www.macworld.com/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testNatashaTheRobotRSSType() {
//
// let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testDontHitSaveRSSWithBOMType() {
//
// let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testBioRDF() {
// let d = parserData("bio", "rdf", "http://connect.biorxiv.org/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// func testPHPXML() {
// let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/")
// let type = feedType(d)
// XCTAssertTrue(type == .rss)
// }
//
// // MARK: Atom
//
// MARK: RSS
func testEMarleyRSSType() {
let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testScriptingNewsRSSType() {
let d = parserData("scriptingNews", "rss", "http://scripting.com/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testKatieFloydRSSType() {
let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testMantonRSSType() {
let d = parserData("manton", "rss", "http://manton.org/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testDCRainmakerRSSType() {
let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testMacworldRSSType() {
let d = parserData("macworld", "rss", "https://www.macworld.com/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testNatashaTheRobotRSSType() {
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testDontHitSaveRSSWithBOMType() {
let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testBioRDF() {
let d = parserData("bio", "rdf", "http://connect.biorxiv.org/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
func testPHPXML() {
let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/")
let type = FeedType.feedType(d.data)
XCTAssertTrue(type == .rss)
}
// MARK: Atom
// func testDaringFireballAtomType() {
//
// // File extension is .rss, but its really an Atom feed.
@ -180,9 +180,9 @@ import SAX
// let type = feedType(d)
// XCTAssertTrue(type == .jsonFeed)
// }
//
// // MARK: Unknown
//
// MARK: Unknown
// func testPartialAllThisUnknownFeedType() {
//
// // In the case of this feed, the partial data isnt enough to detect that its a JSON Feed.
@ -192,9 +192,9 @@ import SAX
// let type = feedType(d, isPartialData: true)
// XCTAssertEqual(type, .unknown)
// }
//
// // MARK: Performance
//
// MARK: Performance
// func testFeedTypePerformance() {
//
// // 0.000 on my 2012 iMac.
@ -204,7 +204,7 @@ import SAX
// let _ = feedType(d)
// }
// }
//
// func testFeedTypePerformance2() {
//
// // 0.000 on my 2012 iMac.
@ -214,7 +214,7 @@ import SAX
// let _ = feedType(d)
// }
// }
//
// func testFeedTypePerformance3() {
//
// // 0.000 on my 2012 iMac.
@ -234,8 +234,7 @@ import SAX
// let _ = feedType(d)
// }
// }
//
//}
}
func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData {
let filename = "Resources/\(filename)"