From 860ecfd58c084995421324b11b42d3b6ce24b17b Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Wed, 11 Sep 2024 21:53:58 -0700 Subject: [PATCH] Start porting FeedType to Swift. --- .../Sources/FeedParser/Feeds/FeedType.swift | 110 +++++++---- .../FeedParserTests/FeedParserTypeTests.swift | 175 +++++++++--------- 2 files changed, 156 insertions(+), 129 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index f934c8920..4e4bf9960 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -10,53 +10,81 @@ import Foundation import SAX public enum FeedType: Sendable { + case rss case atom case jsonFeed case rssInJSON case unknown case notAFeed + + private static let minNumberOfBytesRequired = 128 + + static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType { + + // Can call with partial data — while still downloading, for instance. + // If there’s not enough data, return .unknown. Ask again when there’s more data. + // If it’s definitely not a feed, return .notAFeed. + + if data.count < minNumberOfBytesRequired { + return .unknown + } + + let count = data.count + + return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in + + guard let baseAddress = pointer.baseAddress else { + return .unknown + } + let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self) + + if isProbablyRSS(cCharPointer, count) { + return .rss + } + + return .unknown + } +// if d.isProbablyJSONFeed() { +// return .jsonFeed +// } +// if d.isProbablyRSSInJSON() { +// return .rssInJSON +// } +// if d.isProbablyAtom() { +// return .atom +// } +// +// if isPartialData && d.isProbablyJSON() { +// // Might not be able to detect a JSON Feed without all data. +// // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) +// // has, at this writing, the JSON version element at the end of the feed, +// // which is totally legal — but it means not being able to detect +// // that it’s a JSON Feed without all the data. +// // So this returns .unknown instead of .notAFeed. +// return .unknown +// } + +// return .notAFeed + +// return type + } } +private extension FeedType { -//private let minNumberOfBytesRequired = 128 -// -//public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { -// -// // Can call with partial data — while still downloading, for instance. -// // If there’s not enough data, return .unknown. Ask again when there’s more data. -// // If it’s definitely not a feed, return .notAFeed. -// // -// // This is fast enough to call on the main thread. -// -// if parserData.data.count < minNumberOfBytesRequired { -// return .unknown -// } -// -// let nsdata = parserData.data as NSData -// -// if nsdata.isProbablyJSONFeed() { -// return .jsonFeed -// } -// if nsdata.isProbablyRSSInJSON() { -// return .rssInJSON -// } -// if nsdata.isProbablyRSS() { -// return .rss -// } -// if nsdata.isProbablyAtom() { -// return .atom -// } -// -// if isPartialData && nsdata.isProbablyJSON() { -// // Might not be able to detect a JSON Feed without all data. -// // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) -// // has, at this writing, the JSON version element at the end of the feed, -// // which is totally legal — but it means not being able to detect -// // that it’s a JSON Feed without all the data. -// // So this returns .unknown instead of .notAFeed. -// return .unknown -// } -// -// return .notAFeed -//} + static func isProbablyRSS(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + if didFindString("", bytes, count) && didFindString("", bytes, count) + } + + static func didFindString(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { + + let foundString = strnstr(bytes, string, numberOfBytes) + return foundString != nil + } +} diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift index 5028ddef5..dc28935ab 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift @@ -7,13 +7,13 @@ // import XCTest -import FeedParser +@testable import FeedParser import SAX -//class FeedParserTypeTests: XCTestCase { -// -// // MARK: HTML -// +class FeedParserTypeTests: XCTestCase { + + // MARK: HTML + // func testDaringFireballHTMLType() { // // let d = parserData("DaringFireball", "html", "http://daringfireball.net/") @@ -41,79 +41,79 @@ import SAX // let type = feedType(d) // XCTAssertTrue(type == .notAFeed) // } -// -// // MARK: RSS -// -// func testEMarleyRSSType() { -// -// let d = parserData("EMarley", "rss", "https://medium.com/@emarley") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testScriptingNewsRSSType() { -// -// let d = parserData("scriptingNews", "rss", "http://scripting.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testKatieFloydRSSType() { -// -// let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testMantonRSSType() { -// -// let d = parserData("manton", "rss", "http://manton.org/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testDCRainmakerRSSType() { -// -// let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testMacworldRSSType() { -// -// let d = parserData("macworld", "rss", "https://www.macworld.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testNatashaTheRobotRSSType() { -// -// let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testDontHitSaveRSSWithBOMType() { -// -// let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testBioRDF() { -// let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testPHPXML() { -// let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// // MARK: Atom -// + + // MARK: RSS + + func testEMarleyRSSType() { + + let d = parserData("EMarley", "rss", "https://medium.com/@emarley") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testScriptingNewsRSSType() { + + let d = parserData("scriptingNews", "rss", "http://scripting.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testKatieFloydRSSType() { + + let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testMantonRSSType() { + + let d = parserData("manton", "rss", "http://manton.org/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testDCRainmakerRSSType() { + + let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testMacworldRSSType() { + + let d = parserData("macworld", "rss", "https://www.macworld.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testNatashaTheRobotRSSType() { + + let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testDontHitSaveRSSWithBOMType() { + + let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testBioRDF() { + let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testPHPXML() { + let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + // MARK: Atom + // func testDaringFireballAtomType() { // // // File extension is .rss, but it’s really an Atom feed. @@ -180,9 +180,9 @@ import SAX // let type = feedType(d) // XCTAssertTrue(type == .jsonFeed) // } -// -// // MARK: Unknown -// + + // MARK: Unknown + // func testPartialAllThisUnknownFeedType() { // // // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. @@ -192,9 +192,9 @@ import SAX // let type = feedType(d, isPartialData: true) // XCTAssertEqual(type, .unknown) // } -// -// // MARK: Performance -// + + // MARK: Performance + // func testFeedTypePerformance() { // // // 0.000 on my 2012 iMac. @@ -204,7 +204,7 @@ import SAX // let _ = feedType(d) // } // } -// + // func testFeedTypePerformance2() { // // // 0.000 on my 2012 iMac. @@ -214,7 +214,7 @@ import SAX // let _ = feedType(d) // } // } -// + // func testFeedTypePerformance3() { // // // 0.000 on my 2012 iMac. @@ -234,8 +234,7 @@ import SAX // let _ = feedType(d) // } // } -// -//} +} func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { let filename = "Resources/\(filename)"