Continue work on Swift port of Parser.

This commit is contained in:
Brent Simmons 2024-09-02 12:03:24 -07:00
parent 61825a6d88
commit f86a5f8314
19 changed files with 1430 additions and 127 deletions

View File

@ -49,6 +49,20 @@
ReferencedContainer = "container:">
</BuildableReference>
</BuildActionEntry>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FeedParser"
BuildableName = "FeedParser"
BlueprintName = "FeedParser"
ReferencedContainer = "container:">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
@ -88,6 +102,16 @@
ReferencedContainer = "container:">
</BuildableReference>
</TestableReference>
<TestableReference
skipped = "NO">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "DateParserTests"
BuildableName = "DateParserTests"
BlueprintName = "DateParserTests"
ReferencedContainer = "container:">
</BuildableReference>
</TestableReference>
</Testables>
</TestAction>
<LaunchAction

View File

@ -19,10 +19,14 @@ let package = Package(
.library(
name: "OPMLParser",
type: .dynamic,
targets: ["OPMLParser"])
targets: ["OPMLParser"]),
.library(
name: "DateParser",
type: .dynamic,
targets: ["DateParser"])
],
dependencies: [
.package(path: "../FoundationExtras"),
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
@ -38,7 +42,9 @@ let package = Package(
.target(
name: "FeedParser",
dependencies: [
"SAX"
"SAX",
"FoundationExtras",
"DateParser"
],
swiftSettings: [
.enableExperimentalFeature("StrictConcurrency")
@ -49,6 +55,12 @@ let package = Package(
swiftSettings: [
.enableExperimentalFeature("StrictConcurrency")
]),
.target(
name: "DateParser",
dependencies: [],
swiftSettings: [
.enableExperimentalFeature("StrictConcurrency")
]),
.testTarget(
name: "FeedParserTests",
dependencies: ["FeedParser"],
@ -58,6 +70,9 @@ let package = Package(
name: "OPMLParserTests",
dependencies: ["OPMLParser"],
resources: [.copy("Resources")]),
.testTarget(
name: "DateParserTests",
dependencies: ["DateParser"])
]
)

View File

@ -0,0 +1,516 @@
//
// DateParser.swift
//
//
// Created by Brent Simmons on 8/28/24.
//
import Foundation
public final class DateParser {
typealias DateBuffer = UnsafeBufferPointer<UInt8>
// MARK: - Public API
/// Parse W3C and pubDate dates used for feed parsing.
/// This is a fast alternative to system APIs
/// for parsing dates.
public static func date(data: Data) -> Date? {
let numberOfBytes = data.count
// Make sure its in reasonable range for a date string.
if numberOfBytes < 6 || numberOfBytes > 150 {
return nil
}
return data.withUnsafeBytes { bytes in
let buffer = bytes.bindMemory(to: UInt8.self)
if dateIsW3CDate(buffer, numberOfBytes) {
return parseW3CDate(buffer, numberOfBytes)
}
else if dateIsPubDate(buffer, numberOfBytes) {
return parsePubDate(buffer, numberOfBytes)
}
// Fallback, in case our detection fails.
return parseW3CDate(buffer, numberOfBytes)
}
}
}
// MARK: - Private
private extension DateParser {
struct DateCharacter {
static let space = Character(" ").asciiValue
static let `return` = Character("\r").asciiValue
static let newline = Character("\n").asciiValue
static let tab = Character("\t").asciiValue
static let hyphen = Character("-").asciiValue
static let comma = Character(",").asciiValue
static let dot = Character(".").asciiValue
static let colon = Character(":").asciiValue
static let plus = Character("+").asciiValue
static let minus = Character("-").asciiValue
static let Z = Character("Z").asciiValue
static let z = Character("z").asciiValue
static let F = Character("F").asciiValue
static let f = Character("f").asciiValue
static let S = Character("S").asciiValue
static let s = Character("s").asciiValue
static let O = Character("O").asciiValue
static let o = Character("o").asciiValue
static let N = Character("N").asciiValue
static let n = Character("n").asciiValue
static let D = Character("D").asciiValue
static let d = Character("d").asciiValue
}
enum Month: Int {
January = 1,
February,
March,
April,
May,
June,
July,
August,
September,
October,
November,
December
}
// MARK: - Standard Formats
static func dateIsW3CDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Bool {
// Something like 2010-11-17T08:40:07-05:00
// But might be missing T character in the middle.
// Looks for four digits in a row followed by a -.
for i in 0..<numberOfBytes - 4 {
let ch = bytes[i]
// Skip whitespace.
if ch == DateCharacter.space || ch == DateCharacter.`return` || ch == DateCharacter.newline || ch == DateCharacter.tab {
continue
}
assert(i + 4 < numberOfBytes)
// First non-whitespace character must be the beginning of the year, as in `2010-`
return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == DateCharacter.hyphen
}
return false
}
static func dateIsPubDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Bool {
for ch in bytes {
if ch == DateCharacter.space || ch == DateCharacter.comma {
return true
}
}
return false
}
static func parseW3CDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Date {
/*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss"
@"yyyy-MM-dd'T'HH:mm:sszzz"
@"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz"
etc.*/
var finalIndex = 0
let year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex)
let month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex)
let day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex)
let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex)
let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex)
let second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex)
let currentIndex = finalIndex + 1
let milliseconds = {
var ms = 0
let hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.dot)
if hasMilliseconds {
ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex)
currentIndex = finalIndex + 1
}
return ms
}()
let timeZoneOffset = parsedtimeZoneOffset(bytes, numberOfBytes, currentIndex)
return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset)
}
static func parsePubDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Date {
var finalIndex = 0
let day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex) ?? 1
let month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex)
let year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex)
let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
var currentIndex = finalIndex + 1
let second = {
var s = 0
let hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.colon)
if hasSeconds {
s = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex)
}
return s
}()
currentIndex = finalIndex + 1
let timeZoneOffset = {
var offset = 0
let hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.space)
if hasTimeZone {
offset = parsedtimeZoneOffset(bytes, numberOfBytes, currentIndex)
}
return offset
}()
return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset)
}
// MARK: - Date Creation
static func dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int, _ timeZoneOffset: Int) {
var timeInfo = tm()
timeInfo.tm_sec = CInt(second)
timeInfo.tm_min = CInt(minute)
timeInfo.tm_hour = CInt(hour)
timeInfo.tm_mday = CInt(day)
timeInfo.tm_mon = CInt(month - 1) //It's 1-based coming in
timeInfo.tm_year = CInt(year - 1900) //see time.h -- it's years since 1900
timeInfo.tm_wday = -1
timeInfo.tm_yday = -1
timeInfo.tm_isdst = -1
timeInfo.tm_gmtoff = timeZoneOffset;
timeInfo.tm_zone = nil;
var rawTime = timegm(&timeInfo)
if rawTime == time_t(UInt.max) {
// NSCalendar is super-amazingly slow (which is partly why this parser exists),
// so this is used only when the date is far enough in the future
// (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails.
// Hopefully by the time we consistently need dates that far in the future
// the performance of NSCalendar wont be an issue.
var dateComponents = DateComponents()
dateComponents.timeZone = TimeZone(forSecondsFromGMT: timeZoneOffset)
dateComponents.year = year
dateComponents.month = month
dateComponents.day = day
dateComponents.hour = hour
dateComponents.minute = minute
dateComponents.second = second + (milliseconds / 1000)
return Calendar.autoupdatingCurrent.date(from: dateComponents)
}
if milliseconds > 0 {
rawTime += Float(milliseconds) / 1000.0
}
return Date(timeIntervalSince1970: rawTime)
}
// MARK: - Time Zones and Offsets
static let kGMT = "GMT".utf8CString
static let kUTC = "UTC".utf8CString
static func parsedTimeZoneOffset(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int) -> Int {
var timeZoneCharacters: [CChar] = [0, 0, 0, 0, 0, 0] // nil-terminated last character
var numberOfCharactersFound = 0
var hasAtLeastOneAlphaCharacter = false
for i in startingIndex..<numberOfBytes {
let ch = bytes[i]
if ch == DateCharacter.colon || ch == DateCharacter.space {
continue
}
let isAlphaCharacter = isalpha(ch)
if isAlphaCharacter {
hasAtLeastOneAlphaCharacter = true
}
if isAlphaCharacter || isdigit(ch) || ch == DateCharacter.plus || ch == DateCharacter.minus {
numberOfCharactersFound += 1
timeZoneCharacters[numberOfCharactersFound - 1] = ch
}
if numberOfCharactersFound >= 5 {
break
}
}
if numberOfCharactersFound < 1 || timeZoneCharacters[0] == DateCharacter.Z || timeZoneCharacters[0] == DateCharacter.z {
return 0
}
if strcasestr(timeZoneCharacters, kGMT) != nil || strcasestr(timeZoneCharacters, kUTC) != nil {
return 0
}
if hasAtLeastOneAlphaCharacter {
return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters)
}
return offsetInSecondsForOffsetCharacters(timeZoneCharacters)
}
static func offsetInSecondsForOffsetCharacters(_ timeZoneCharacters: DateBuffer) {
let isPlus = timeZoneCharacters[0] == DateCharacter.plus
var finalIndex = 0
let numberOfCharacters = strlen(timeZoneCharacters)
let hours = nextNumericValue(timeZoneCharacters, numberOfCharacters, 0, 2, &finalIndex) ?? 0
let minutes = nextNumericValue(timeZoneCharacters, numberOfCharacters, finalIndex + 1, 2, &finalIndex) ?? 0
if hours == 0 && minutes == 0 {
return 0
}
var seconds = (hours * 60 * 60) + (minutes * 60)
if !isPlus {
seconds = 0 - seconds
}
return seconds
}
/// Returns offset in seconds.
static func timeZoneOffset(_ hours: Int, _ minutes: Int) -> Int {
if hours < 0 {
return (hours * 60 * 60) - (minutes * 60)
}
return (hours * 60 * 60) + (minutes * 60)
}
// See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list
private let timeZoneTable: [String: Int] = [
"GMT": timeZoneOffset(0, 0),
"PDT": timeZoneOffset(-7, 0),
"PST": timeZoneOffset(-8, 0),
"EST": timeZoneOffset(-5, 0),
"EDT": timeZoneOffset(-4, 0),
"MDT": timeZoneOffset(-6, 0),
"MST": timeZoneOffset(-7, 0),
"CST": timeZoneOffset(-6, 0),
"CDT": timeZoneOffset(-5, 0),
"ACT": timeZoneOffset(-8, 0),
"AFT": timeZoneOffset(4, 30),
"AMT": timeZoneOffset(4, 0),
"ART": timeZoneOffset(-3, 0),
"AST": timeZoneOffset(3, 0),
"AZT": timeZoneOffset(4, 0),
"BIT": timeZoneOffset(-12, 0),
"BDT": timeZoneOffset(8, 0),
"ACST": timeZoneOffset(9, 30),
"AEST": timeZoneOffset(10, 0),
"AKST": timeZoneOffset(-9, 0),
"AMST": timeZoneOffset(5, 0),
"AWST": timeZoneOffset(8, 0),
"AZOST": timeZoneOffset(-1, 0),
"BIOT": timeZoneOffset(6, 0),
"BRT": timeZoneOffset(-3, 0),
"BST": timeZoneOffset(6, 0),
"BTT": timeZoneOffset(6, 0),
"CAT": timeZoneOffset(2, 0),
"CCT": timeZoneOffset(6, 30),
"CET": timeZoneOffset(1, 0),
"CEST": timeZoneOffset(2, 0),
"CHAST": timeZoneOffset(12, 45),
"ChST": timeZoneOffset(10, 0),
"CIST": timeZoneOffset(-8, 0),
"CKT": timeZoneOffset(-10, 0),
"CLT": timeZoneOffset(-4, 0),
"CLST": timeZoneOffset(-3, 0),
"COT": timeZoneOffset(-5, 0),
"COST": timeZoneOffset(-4, 0),
"CVT": timeZoneOffset(-1, 0),
"CXT": timeZoneOffset(7, 0),
"EAST": timeZoneOffset(-6, 0),
"EAT": timeZoneOffset(3, 0),
"ECT": timeZoneOffset(-4, 0),
"EEST": timeZoneOffset(3, 0),
"EET": timeZoneOffset(2, 0),
"FJT": timeZoneOffset(12, 0),
"FKST": timeZoneOffset(-4, 0),
"GALT": timeZoneOffset(-6, 0),
"GET": timeZoneOffset(4, 0),
"GFT": timeZoneOffset(-3, 0),
"GILT": timeZoneOffset(7, 0),
"GIT": timeZoneOffset(-9, 0),
"GST": timeZoneOffset(-2, 0),
"GYT": timeZoneOffset(-4, 0),
"HAST": timeZoneOffset(-10, 0),
"HKT": timeZoneOffset(8, 0),
"HMT": timeZoneOffset(5, 0),
"IRKT": timeZoneOffset(8, 0),
"IRST": timeZoneOffset(3, 30),
"IST": timeZoneOffset(2, 0),
"JST": timeZoneOffset(9, 0),
"KRAT": timeZoneOffset(7, 0),
"KST": timeZoneOffset(9, 0),
"LHST": timeZoneOffset(10, 30),
"LINT": timeZoneOffset(14, 0),
"MAGT": timeZoneOffset(11, 0),
"MIT": timeZoneOffset(-9, 30),
"MSK": timeZoneOffset(3, 0),
"MUT": timeZoneOffset(4, 0),
"NDT": timeZoneOffset(-2, 30),
"NFT": timeZoneOffset(11, 30),
"NPT": timeZoneOffset(5, 45),
"NT": timeZoneOffset(-3, 30),
"OMST": timeZoneOffset(6, 0),
"PETT": timeZoneOffset(12, 0),
"PHOT": timeZoneOffset(13, 0),
"PKT": timeZoneOffset(5, 0),
"RET": timeZoneOffset(4, 0),
"SAMT": timeZoneOffset(4, 0),
"SAST": timeZoneOffset(2, 0),
"SBT": timeZoneOffset(11, 0),
"SCT": timeZoneOffset(4, 0),
"SLT": timeZoneOffset(5, 30),
"SST": timeZoneOffset(8, 0),
"TAHT": timeZoneOffset(-10, 0),
"THA": timeZoneOffset(7, 0),
"UYT": timeZoneOffset(-3, 0),
"UYST": timeZoneOffset(-2, 0),
"VET": timeZoneOffset(-4, 30),
"VLAT": timeZoneOffset(10, 0),
"WAT": timeZoneOffset(1, 0),
"WET": timeZoneOffset(0, 0),
"WEST": timeZoneOffset(1, 0),
"YAKT": timeZoneOffset(9, 0),
"YEKT": timeZoneOffset(5, 0)
]
static func offsetInSecondsForTimeZoneAbbreviation(_ abbreviation: DateBuffer) -> Int? {
let name = String(cString: abbreviation)
return timeZoneTable[name]
}
// MARK: - Parser
static func nextMonthValue(_ buffer: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ finalIndex: inout Int) -> DateParser.Month? {
// Lots of short-circuits here. Not strict.
var numberOfAlphaCharactersFound = 0
var monthCharacters: [CChar] = [0, 0, 0]
for i in startingIndex..<numberOfBytes {
finalIndex = i
let ch = bytes[i]
let isAlphaCharacter = isalpha(ch)
if !isAlphaCharacter {
if numberOfAlphaCharactersFound < 1 {
continue
}
if numberOfAlphaCharactersFound > 0 {
break
}
}
numberOfAlphaCharactersFound +=1
if numberOfAlphaCharactersFound == 1 {
if ch == DateCharacter.F || ch == DateCharacter.f {
return February
}
if ch == DateCharacter.S || ch == DateCharacter.s {
return September
}
if ch == DateCharacter.O || ch == DateCharacter.o {
return October
}
if ch == DateCharacter.N || ch == DateCharacter.n {
return November
}
if ch == DateCharacter.D || ch == DateCharacter.d {
return December
}
}
monthCharacters[numberOfAlphaCharactersFound - 1] = character
if numberOfAlphaCharactersFound >=3
break
}
if numberOfAlphaCharactersFound < 2 {
return nil
}
if monthCharacters[0] == DateCharater.J || monthCharacters[0] == DateCharacter.j { // Jan, Jun, Jul
if monthCharacters[1] == DateCharacter.A || monthCharacters[1] == DateCharacter.a {
return Month.January
}
if monthCharacters[1] = DateCharacter.U || monthCharacters[1] == DateCharacter.u {
if monthCharacters[2] == DateCharacter.N || monthCharacters[2] == DateCharacter.n {
return June
}
return July
}
return January
}
if monthCharacters[0] == DateCharacter.M || monthCharacters[0] == DateCharacter.m { // March, May
if monthCharacters[2] == DateCharacter.Y || monthCharacters[2] == DateCharacter.y {
return May
}
return March
}
if monthCharacters[0] == DateCharacter.A || monthCharacters[0] == DateCharacter.a { // April, August
if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u {
return August
}
return April
}
return January // Should never get here (but possibly do)
}
static func nextNumericValue(_ bytes: DateBuffer, numberOfBytes: Int, startingIndex: Int, maximumNumberOfDigits: Int, finalIndex: inout Int) -> Int? {
// Maximum for the maximum is 4 (for time zone offsets and years)
assert(maximumNumberOfDigits > 0 && maximumNumberOfDigits <= 4)
var numberOfDigitsFound = 0
var digits = [0, 0, 0, 0]
for i in startingIndex..<numberOfBytes {
finalIndex = i
let isDigit = isDigit(
}
}
}

View File

@ -0,0 +1,354 @@
////
//// DateParser.swift
////
////
//// Created by Brent Simmons on 8/28/24.
////
//
//import Foundation
//
//private struct TimeZoneSpecifier {
// let abbreviation: String
// let offsetHours: Int
// let offsetMinutes: Int
//
// init(_ abbreviation: String, _ offsetHours: Int, _ offsetMinutes: Int) {
// self.abbreviation = abbreviation
// self.offsetHours = offsetHours
// self.offsetMinutes = offsetMinutes
// }
//}
//
//// See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list
//private let timeZoneTable: [TimeZoneAbbreviationAndOffset] = [
// // Most common at top for performance
// TimeZoneSpecifier("GMT", 0, 0),
// TimeZoneSpecifier("PDT", -7, 0),
// TimeZoneSpecifier("PST", -8, 0),
// TimeZoneSpecifier("EST", -5, 0),
// TimeZoneSpecifier("EDT", -4, 0),
// TimeZoneSpecifier("MDT", -6, 0),
// TimeZoneSpecifier("MST", -7, 0),
// TimeZoneSpecifier("CST", -6, 0),
// TimeZoneSpecifier("CDT", -5, 0),
// TimeZoneSpecifier("ACT", -8, 0),
// TimeZoneSpecifier("AFT", 4, 30),
// TimeZoneSpecifier("AMT", 4, 0),
// TimeZoneSpecifier("ART", -3, 0),
// TimeZoneSpecifier("AST", 3, 0),
// TimeZoneSpecifier("AZT", 4, 0),
// TimeZoneSpecifier("BIT", -12, 0),
// TimeZoneSpecifier("BDT", 8, 0),
// TimeZoneSpecifier("ACST", 9, 30),
// TimeZoneSpecifier("AEST", 10, 0),
// TimeZoneSpecifier("AKST", -9, 0),
// TimeZoneSpecifier("AMST", 5, 0),
// TimeZoneSpecifier("AWST", 8, 0),
// TimeZoneSpecifier("AZOST", -1, 0),
// TimeZoneSpecifier("BIOT", 6, 0),
// TimeZoneSpecifier("BRT", -3, 0),
// TimeZoneSpecifier("BST", 6, 0),
// TimeZoneSpecifier("BTT", 6, 0),
// TimeZoneSpecifier("CAT", 2, 0),
// TimeZoneSpecifier("CCT", 6, 30),
// TimeZoneSpecifier("CET", 1, 0),
// TimeZoneSpecifier("CEST", 2, 0),
// TimeZoneSpecifier("CHAST", 12, 45),
// TimeZoneSpecifier("ChST", 10, 0),
// TimeZoneSpecifier("CIST", -8, 0),
// TimeZoneSpecifier("CKT", -10, 0),
// TimeZoneSpecifier("CLT", -4, 0),
// TimeZoneSpecifier("CLST", -3, 0),
// TimeZoneSpecifier("COT", -5, 0),
// TimeZoneSpecifier("COST", -4, 0),
// TimeZoneSpecifier("CVT", -1, 0),
// TimeZoneSpecifier("CXT", 7, 0),
// TimeZoneSpecifier("EAST", -6, 0),
// TimeZoneSpecifier("EAT", 3, 0),
// TimeZoneSpecifier("ECT", -4, 0),
// TimeZoneSpecifier("EEST", 3, 0),
// TimeZoneSpecifier("EET", 2, 0),
// TimeZoneSpecifier("FJT", 12, 0),
// TimeZoneSpecifier("FKST", -4, 0),
// TimeZoneSpecifier("GALT", -6, 0),
// TimeZoneSpecifier("GET", 4, 0),
// TimeZoneSpecifier("GFT", -3, 0),
// TimeZoneSpecifier("GILT", 7, 0),
// TimeZoneSpecifier("GIT", -9, 0),
// TimeZoneSpecifier("GST", -2, 0),
// TimeZoneSpecifier("GYT", -4, 0),
// TimeZoneSpecifier("HAST", -10, 0),
// TimeZoneSpecifier("HKT", 8, 0),
// TimeZoneSpecifier("HMT", 5, 0),
// TimeZoneSpecifier("IRKT", 8, 0),
// TimeZoneSpecifier("IRST", 3, 30),
// TimeZoneSpecifier("IST", 2, 0),
// TimeZoneSpecifier("JST", 9, 0),
// TimeZoneSpecifier("KRAT", 7, 0),
// TimeZoneSpecifier("KST", 9, 0),
// TimeZoneSpecifier("LHST", 10, 30),
// TimeZoneSpecifier("LINT", 14, 0),
// TimeZoneSpecifier("MAGT", 11, 0),
// TimeZoneSpecifier("MIT", -9, 30),
// TimeZoneSpecifier("MSK", 3, 0),
// TimeZoneSpecifier("MUT", 4, 0),
// TimeZoneSpecifier("NDT", -2, 30),
// TimeZoneSpecifier("NFT", 11, 30),
// TimeZoneSpecifier("NPT", 5, 45),
// TimeZoneSpecifier("NT", -3, 30),
// TimeZoneSpecifier("OMST", 6, 0),
// TimeZoneSpecifier("PETT", 12, 0),
// TimeZoneSpecifier("PHOT", 13, 0),
// TimeZoneSpecifier("PKT", 5, 0),
// TimeZoneSpecifier("RET", 4, 0),
// TimeZoneSpecifier("SAMT", 4, 0),
// TimeZoneSpecifier("SAST", 2, 0),
// TimeZoneSpecifier("SBT", 11, 0),
// TimeZoneSpecifier("SCT", 4, 0),
// TimeZoneSpecifier("SLT", 5, 30),
// TimeZoneSpecifier("SST", 8, 0),
// TimeZoneSpecifier("TAHT", -10, 0),
// TimeZoneSpecifier("THA", 7, 0),
// TimeZoneSpecifier("UYT", -3, 0),
// TimeZoneSpecifier("UYST", -2, 0),
// TimeZoneSpecifier("VET", -4, 30),
// TimeZoneSpecifier("VLAT", 10, 0),
// TimeZoneSpecifier("WAT", 1, 0),
// TimeZoneSpecifier("WET", 0, 0),
// TimeZoneSpecifier("WEST", 1, 0),
// TimeZoneSpecifier("YAKT", 9, 0),
// TimeZoneSpecifier("YEKT", 5, 0)
//]
//
//private enum Month: Int {
// case January = 1, February, March, April, May, June, July, August, September, October, November, December
//}
//
//private func nextMonthValue(bytes: String, startingIndex: Int, finalIndex: inout Int) -> Int? {
//
// // Months are 1-based -- January is 1, Dec is 12.
// // Lots of short-circuits here. Not strict. GIGO
//
// var i = startingIndex
// var numberOfBytes = bytes.count
// var numberOfAlphaCharactersFound = 0
// var monthCharacters = [Character]()
//
// while index < bytes.count {
//
//
// }
//
//
// var index = startingIndex
// var numberOfAlphaCharactersFound = 0
// var monthCharacters: [Character] = []
//
// while index < bytes.count {
// let character = bytes[bytes.index(bytes.startIndex, offsetBy: index)]
//
// if !character.isLetter, numberOfAlphaCharactersFound < 1 {
// index += 1
// continue
// }
// if !character.isLetter, numberOfAlphaCharactersFound > 0 {
// break
// }
//
// numberOfAlphaCharactersFound += 1
// if numberOfAlphaCharactersFound == 1 {
// switch character.lowercased() {
// case "f": return (.February.rawValue, index)
// case "s": return (.September.rawValue, index)
// case "o": return (.October.rawValue, index)
// case "n": return (.November.rawValue, index)
// case "d": return (.December.rawValue, index)
// default: break
// }
// }
//
// monthCharacters.append(character)
// if numberOfAlphaCharactersFound >= 3 {
// break
// }
// index += 1
// }
//
// if numberOfAlphaCharactersFound < 2 {
// return (nil, index)
// }
//
// if monthCharacters[0].lowercased() == "j" {
// if monthCharacters[1].lowercased() == "a" {
// return (.January.rawValue, index)
// }
// if monthCharacters[1].lowercased() == "u" {
// if monthCharacters.count > 2 && monthCharacters[2].lowercased() == "n" {
// return (.June.rawValue, index)
// }
// return (.July.rawValue, index)
// }
// return (.January.rawValue, index)
// }
//
// if monthCharacters[0].lowercased() == "m" {
// if monthCharacters.count > 2 && monthCharacters[2].lowercased() == "y" {
// return (.May.rawValue, index)
// }
// return (.March.rawValue, index)
// }
//
// if monthCharacters[0].lowercased() == "a" {
// if monthCharacters[1].lowercased() == "u" {
// return (.August.rawValue, index)
// }
// return (.April.rawValue, index)
// }
//
// return (.January.rawValue, index)
//}
//
//func nextNumericValue(bytes: String, startingIndex: Int, maximumNumberOfDigits: Int) -> (Int?, Int) {
// let digits = bytes.dropFirst(startingIndex).prefix(maximumNumberOfDigits)
// guard let value = Int(digits) else {
// return (nil, startingIndex)
// }
// return (value, startingIndex + digits.count)
//}
//
//func hasAtLeastOneAlphaCharacter(_ s: String) -> Bool {
// return s.contains { $0.isLetter }
//}
//
//func offsetInSeconds(forTimeZoneAbbreviation abbreviation: String) -> Int {
// for zone in timeZoneTable {
// if zone.abbreviation.caseInsensitiveCompare(abbreviation) == .orderedSame {
// if zone.offsetHours < 0 {
// return (zone.offsetHours * 3600) - (zone.offsetMinutes * 60)
// }
// return (zone.offsetHours * 3600) + (zone.offsetMinutes * 60)
// }
// }
// return 0
//}
//
//func offsetInSeconds(forOffsetCharacters timeZoneCharacters: String) -> Int {
// let isPlus = timeZoneCharacters.hasPrefix("+")
// let numericValue = timeZoneCharacters.filter { $0.isNumber || $0 == "-" }
// let (hours, finalIndex) = nextNumericValue(bytes: numericValue, startingIndex: 0, maximumNumberOfDigits: 2)
// let (minutes, _) = nextNumericValue(bytes: numericValue, startingIndex: finalIndex + 1, maximumNumberOfDigits: 2)
//
// let seconds = ((hours ?? 0) * 3600) + ((minutes ?? 0) * 60)
// return isPlus ? seconds : -seconds
//}
//
//func parsedTimeZoneOffset(bytes: String, startingIndex: Int) -> Int {
// var timeZoneCharacters: String = ""
// var numberOfCharactersFound = 0
// var i = startingIndex
//
// while i < bytes.count, numberOfCharactersFound < 5 {
// let character = bytes[bytes.index(bytes.startIndex, offsetBy: i)]
// if character != ":" && character != " " {
// timeZoneCharacters.append(character)
// numberOfCharactersFound += 1
// }
// i += 1
// }
//
// if numberOfCharactersFound < 1 || timeZoneCharacters.lowercased() == "z" {
// return 0
// }
//
// if timeZoneCharacters.range(of: "GMT", options: .caseInsensitive) != nil ||
// timeZoneCharacters.range(of: "UTC", options: .caseInsensitive) != nil {
// return 0
// }
//
// if hasAtLeastOneAlphaCharacter(timeZoneCharacters) {
// return offsetInSeconds(forTimeZoneAbbreviation: timeZoneCharacters)
// }
// return offsetInSeconds(forOffsetCharacters: timeZoneCharacters)
//}
//
//func dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(
// year: Int, month: Int, day: Int,
// hour: Int, minute: Int, second: Int,
// milliseconds: Int, timeZoneOffset: Int) -> Date? {
//
// var dateComponents = DateComponents()
// dateComponents.year = year
// dateComponents.month = month
// dateComponents.day = day
// dateComponents.hour = hour
// dateComponents.minute = minute
// dateComponents.second = second
// dateComponents.timeZone = TimeZone(secondsFromGMT: timeZoneOffset)
//
// let calendar = Calendar.current
// return calendar.date(from: dateComponents)
//}
//
//func parsePubDate(bytes: String) -> Date? {
// let (day, finalIndex) = nextNumericValue(bytes: bytes, startingIndex: 0, maximumNumberOfDigits: 2)
// let (month, finalIndex2) = nextMonthValue(bytes: bytes, startingIndex: finalIndex + 1)
// let (year, finalIndex3) = nextNumericValue(bytes: bytes, startingIndex: finalIndex2 + 1, maximumNumberOfDigits: 4)
// let (hour, finalIndex4) = nextNumericValue(bytes: bytes, startingIndex: finalIndex3 + 1, maximumNumberOfDigits: 2)
// let (minute, finalIndex5) = nextNumericValue(bytes: bytes, startingIndex: finalIndex4 + 1, maximumNumberOfDigits: 2)
//
// var second = 0
// let currentIndex = finalIndex5 + 1
// if currentIndex < bytes.count, bytes[bytes.index(bytes.startIndex, offsetBy: currentIndex)] == ":" {
// second = nextNumericValue(bytes: bytes, startingIndex: currentIndex, maximumNumberOfDigits: 2).0 ?? 0
// }
//
// let timeZoneOffset = parsedTimeZoneOffset(bytes: bytes, startingIndex: currentIndex + 1)
//
// return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(
// year: year ?? 1970,
// month: month ?? RSMonth.January.rawValue,
// day: day ?? 1,
// hour: hour ?? 0,
// minute: minute ?? 0,
// second: second,
// milliseconds: 0,
// timeZoneOffset: timeZoneOffset
// )
//}
//
//func parseW3C(bytes: String) -> Date? {
// let (year, finalIndex) = nextNumericValue(bytes: bytes, startingIndex: 0, maximumNumberOfDigits: 4)
// let (month, finalIndex2) = nextNumericValue(bytes: bytes, startingIndex: finalIndex + 1, maximumNumberOfDigits: 2)
// let (day, finalIndex3) = nextNumericValue(bytes: bytes, startingIndex: finalIndex2 + 1, maximumNumberOfDigits: 2)
// let (hour, finalIndex4) = nextNumericValue(bytes: bytes, startingIndex: finalIndex3 + 1, maximumNumberOfDigits: 2)
// let (minute, finalIndex5) = nextNumericValue(bytes: bytes, startingIndex: finalIndex4 + 1, maximumNumberOfDigits: 2)
// let (second, finalIndex6) = nextNumericValue(bytes: bytes, startingIndex: finalIndex5 + 1, maximumNumberOfDigits: 2)
//
// var milliseconds = 0
// let currentIndex = finalIndex6 + 1
// if currentIndex < bytes.count, bytes[bytes.index(bytes.startIndex, offsetBy: currentIndex)] == "." {
// milliseconds = nextNumericValue(bytes: bytes, startingIndex: currentIndex + 1, maximumNumberOfDigits: 3).0 ?? 0
// }
//
// let timeZoneOffset = parsedTimeZoneOffset(bytes: bytes, startingIndex: currentIndex + 1)
//
// return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(
// year: year ?? 1970,
// month: month ?? RSMonth.January.rawValue,
// day: day ?? 1,
// hour: hour ?? 0,
// minute: minute ?? 0,
// second: second ?? 0,
// milliseconds: milliseconds,
// timeZoneOffset: timeZoneOffset
// )
//}
//
//func dateWithBytes(bytes: String) -> Date? {
// guard !bytes.isEmpty else { return nil }
//
// if bytes.range(of: "-") != nil {
// return parseW3C(bytes: bytes)
// }
// return parsePubDate(bytes: bytes)
//}

View File

@ -7,6 +7,7 @@
//
import Foundation
import SAX
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
// You dont need to know the type of feed.

View File

@ -7,6 +7,7 @@
//
import Foundation
import SAX
public enum FeedType: Sendable {
case rss

View File

@ -7,6 +7,7 @@
//
import Foundation
import SAX
// See https://jsonfeed.org/version/1.1

View File

@ -7,6 +7,7 @@
//
import Foundation
import SAX
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
// Also: http://cyber.harvard.edu/rss/rss.html

View File

@ -8,7 +8,7 @@
import Foundation
public struct ParsedItem: Hashable, Sendable {
public final class ParsedItem: Hashable, Sendable {
public let syncServiceID: String? //Nil when not syncing
public let uniqueID: String //RSS guid, for instance; may be calculated

View File

@ -7,6 +7,7 @@
//
import Foundation
import SAX
// RSSParser wraps the Objective-C RSAtomParser.
//

View File

@ -0,0 +1,111 @@
//
// RSSArticle.swift
//
//
// Created by Brent Simmons on 8/27/24.
//
import Foundation
import FoundationExtras
final class RSSArticle {
var feedURL: String
/// An RSS guid, if present, or calculated from other attributes.
/// Should be unique to the feed, but not necessarily unique
/// across different feeds. (Not suitable for a database ID.)
lazy var articleID: String = {
if let guid {
return guid
}
return calculatedArticleID()
}()
var guid: String?
var title: String?
var body: String?
var link: String?
var permalink: String?
var authors: [RSSAuthor]?
var enclosures: [RSSEnclosure]?
var datePublished: Date?
var dateModified: Date?
var dateParsed: Date
var language: String?
init(_ feedURL: String) {
self.feedURL = feedURL
self.dateParsed = Date()
}
func addEnclosure(_ enclosure: RSSEnclosure) {
if enclosures == nil {
enclosures = [RSSEnclosure]()
}
enclosures!.append(enclosure)
}
func addAuthor(_ author: RSSAuthor) {
if authors == nil {
authors = [RSSAuthor]()
}
authors!.append(author)
}
}
private extension RSSArticle {
func calculatedArticleID() -> String {
// Concatenate a combination of properties when no guid. Then hash the result.
// In general, feeds should have guids. When they don't, re-runs are very likely,
// because there's no other 100% reliable way to determine identity.
// This is intended to create an ID unique inside a feed, but not globally unique.
// Not suitable for a database ID, in other words.
var s = ""
let datePublishedTimeStampString: String? = {
guard let datePublished else {
return nil
}
return String(format: "%.0f", datePublished.timeIntervalSince1970)
}()
// Ideally we have a permalink and a pubDate.
// Either one would probably be a good guid, but together they should be rock-solid.
// (In theory. Feeds are buggy, though.)
if let permalink, !permalink.isEmpty, let datePublishedTimeStampString {
s.append(permalink)
s.append(datePublishedTimeStampString)
}
else if let link, !link.isEmpty, let datePublishedTimeStampString {
s.append(link)
s.append(datePublishedTimeStampString)
}
else if let title, !title.isEmpty, let datePublishedTimeStampString {
s.append(title)
s.append(datePublishedTimeStampString)
}
else if let datePublishedTimeStampString {
s.append(datePublishedTimeStampString)
}
else if let permalink, !permalink.isEmpty {
s.append(permalink)
}
else if let link, !link.isEmpty {
s.append(link)
}
else if let title, !title.isEmpty {
s.append(title)
}
else if let body, !body.isEmpty {
s.append(body)
}
return s.md5String
}
}

View File

@ -0,0 +1,35 @@
//
// RSSAuthor.swift
//
//
// Created by Brent Simmons on 8/27/24.
//
import Foundation
final class RSSAuthor {
var name: String?
var url: String?
var avatarURL: String?
var emailAddress: String?
init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) {
self.name = name
self.url = url
self.avatarURL = avatarURL
self.emailAddress = emailAddress
}
/// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.)
convenience init(singleString: String) {
if singleString.contains("@") {
self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString)
} else if singleString.lowercased().hasPrefix("http") {
self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil)
} else {
self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil)
}
}
}

View File

@ -0,0 +1,20 @@
//
// RSSEnclosure.swift
//
//
// Created by Brent Simmons on 8/27/24.
//
import Foundation
final class RSSEnclosure {
var url: String
var length: Int?
var mimeType: String?
var title: String?
init(url: String) {
self.url = url
}
}

View File

@ -0,0 +1,22 @@
//
// RSSFeed.swift
//
//
// Created by Brent Simmons on 8/27/24.
//
import Foundation
final class RSSFeed {
var urlString: String
var title: String?
var link: String?
var language: String?
var articles: [RSSArticle]?
init(urlString: String) {
self.urlString = urlString
}
}

View File

@ -11,12 +11,205 @@ import SAX
public final class RSSParser {
private var parseFeed: ParsedFeed?
private var parserData: ParserData
private var feedURL: String {
parserData.url
}
private var data: Data {
parserData.data
}
private let feed: RSSFeed
private var articles = [RSSArticle]()
private var currentArticle: RSSArticle? {
articles.last
}
public static func parsedFeed(with parserData: ParserData) -> ParsedFeed? {
private var endRSSFound = false
private var isRDF = false
private var parsingArticle = false
private var parsingChannelImage = false
private var parsingAuthor = false
private var currentAttributes: XMLAttributesDictionary?
public static func parsedFeed(with parserData: ParserData) -> RSSFeed {
let parser = RSSParser(parserData)
parser.parse()
return parser.parsedFeed
return parser.feed
}
init(_ parserData: ParserData) {
self.parserData = parserData
self.feed = RSSFeed(urlString: parserData.url)
}
}
private extension RSSParser {
private struct XMLName {
static let uppercaseRDF = "RDF".utf8CString
static let item = "item".utf8CString
static let guid = "guid".utf8CString
static let enclosure = "enclosure".utf8CString
static let rdfAbout = "rdf:about".utf8CString
static let image = "image".utf8CString
static let author = "author".utf8CString
static let rss = "rss".utf8CString
static let link = "link".utf8CString
static let title = "title".utf8CString
static let language = "language".utf8CString
static let dc = "dc".utf8CString
static let content = "content".utf8CString
static let encoded = "encoded".utf8CString
}
func addFeedElement(_ localName: XMLPointer, _ prefix: XMLPointer?) {
guard prefix == nil else {
return
}
if SAXEqualTags(localName, XMLName.link) {
if feed.link == nil {
feed.link = currentString
}
}
else if SAXEqualTags(localName, XMLName.title) {
feed.title = currentString
}
else if SAXEqualTags(localName, XMLName.language) {
feed.language = currentString
}
}
func addArticle() {
let article = RSSArticle(feedURL)
articles.append(article)
}
func addArticleElement(_ localName: XMLPointer, _ prefix: XMLPointer?) {
if SAXEqualTags(prefix, XMLName.dc) {
addDCElement(localName)
return;
}
if SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) {
if let currentString, !currentString.isEmpty {
currentArticle.body = currentString
}
return
}
guard prefix == nil else {
return
}
if SAXEqualTags(localName, XMLName.guid) {
addGuid()
}
else if SAXEqualTags(localName, XMLName.pubDate) {
currentArticle.datePublished = currentDate
}
else if SAXEqualTags(localName, XMLName.author) {
addAuthorWithString(currentString)
}
else if SAXEqualTags(localName, XMLName.link) {
currentArticle.link = urlString(currentString)
}
else if SAXEqualTags(localName, XMLName.description) {
if currentArticle.body == nil {
currentArticle.body = currentString
}
}
else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) {
if let currentString {
currentArticle.title = currentString
}
}
else if SAXEqualTags(localName, XMLName.enclosure) {
addEnclosure()
}
}
}
extension RSSParser: SAXParserDelegate {
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
if endRSSFound {
return
}
if SAXEqualTags(localName, XMLName.uppercaseRDF) {
isRDF = true
return
}
var xmlAttributes: XMLAttributesDictionary? = nil
if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(enclosure, XMLName.enclosure) {
xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount)
}
if currentAttributes != xmlAttributes {
currentAttributes = xmlAttributes
}
if prefix == nil && SAXEqualTags(localName, XMLName.item) {
addArticle()
parsingArticle = true
if isRDF && let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid
currentArticle.guid = rdfGuid
currentArticle.permalink = rdfGuid
}
}
else if prefix == nil && SAXEqualTags(localName, XMLName.image) {
parsingChannelImage = true
}
else if prefix == nil && SAXEqualTags(localName, XMLName.author) {
if parsingArticle {
parsingAuthor = true
}
}
if !parsingChannelImage {
saxParser.beginStoringCharacters()
}
}
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
if endRSSFound {
return
}
if isRDF && SAXEqualTags(localName, XMLName.uppercaseRDF) {
endRSSFound = true
}
else if SAXEqualTags(localName, XMLName.rss) {
endRSSFound = true
}
else if SAXEqualTags(localName, XMLName.image) {
parsingChannelImage = false
}
else if SAXEqualTags(localName, XMLName.item) {
parsingArticle = false
}
else if parsingArticle {
addArticleElement(localName, prefix)
if SAXEqualTags(localName, XMLName.author) {
parsingAuthor = false
}
}
else if !parsingChannelImage {
addFeedElement(localName, prefix)
}
}
public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) {
// Required method.
}
}

View File

@ -22,11 +22,6 @@ public final class OPMLParser {
itemStack.last
}
struct XMLKey {
static let title = "title".utf8CString
static let outline = "outline".utf8CString
}
/// Returns nil if data cant be parsed (if its not OPML).
public static func document(with parserData: ParserData) -> OPMLDocument? {
@ -36,7 +31,6 @@ public final class OPMLParser {
}
init(_ parserData: ParserData) {
self.parserData = parserData
}
}
@ -79,14 +73,19 @@ private extension OPMLParser {
extension OPMLParser: SAXParserDelegate {
private struct XMLName {
static let title = "title".utf8CString
static let outline = "outline".utf8CString
}
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
if SAXEqualTags(localName, XMLKey.title) {
if SAXEqualTags(localName, XMLName.title) {
saxParser.beginStoringCharacters()
return
}
if !SAXEqualTags(localName, XMLKey.outline) {
if !SAXEqualTags(localName, XMLName.outline) {
return
}
@ -99,7 +98,7 @@ extension OPMLParser: SAXParserDelegate {
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
if SAXEqualTags(localName, XMLKey.title) {
if SAXEqualTags(localName, XMLName.title) {
if let item = currentItem as? OPMLDocument {
item.title = saxParser.currentStringWithTrimmedWhitespace
}
@ -107,7 +106,7 @@ extension OPMLParser: SAXParserDelegate {
return
}
if SAXEqualTags(localName, XMLKey.outline) {
if SAXEqualTags(localName, XMLName.outline) {
popItem()
}
}

View File

@ -91,7 +91,9 @@ public final class SAXParser {
characters.count = 0
}
public func attributesDictionary(_ attributes: UnsafePointer<XMLPointer?>?, attributeCount: Int) -> [String: String]? {
public typealias XMLAttributesDictionary = [String: String]
public func attributesDictionary(_ attributes: UnsafePointer<XMLPointer?>?, attributeCount: Int) -> XMLAttributesDictionary? {
guard attributeCount > 0, let attributes else {
return nil

View File

@ -0,0 +1,116 @@
//
// RSDateParserTests.swift
//
//
// Created by Maurice Parker on 4/1/21.
//
import Foundation
import XCTest
@testable import DateParser
class DateParserTests: XCTestCase {
func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int) -> Date {
var dateComponents = DateComponents()
dateComponents.calendar = Calendar.current
dateComponents.timeZone = TimeZone(secondsFromGMT: 0)
dateComponents.year = year
dateComponents.month = month
dateComponents.day = day
dateComponents.hour = hour
dateComponents.minute = minute
dateComponents.second = second
return dateComponents.date!
}
func testDateWithString() {
var expectedDateResult = dateWithValues(2010, 5, 28, 21, 3, 38)
var d = date("Fri, 28 May 2010 21:03:38 +0000")
XCTAssertEqual(d, expectedDateResult)
d = date("Fri, 28 May 2010 21:03:38 +00:00")
XCTAssertEqual(d, expectedDateResult)
d = date("Fri, 28 May 2010 21:03:38 -00:00")
XCTAssertEqual(d, expectedDateResult)
d = date("Fri, 28 May 2010 21:03:38 -0000")
XCTAssertEqual(d, expectedDateResult)
d = date("Fri, 28 May 2010 21:03:38 GMT")
XCTAssertEqual(d, expectedDateResult)
d = date("2010-05-28T21:03:38+00:00")
XCTAssertEqual(d, expectedDateResult)
d = date("2010-05-28T21:03:38+0000")
XCTAssertEqual(d, expectedDateResult)
d = date("2010-05-28T21:03:38-0000")
XCTAssertEqual(d, expectedDateResult)
d = date("2010-05-28T21:03:38-00:00")
XCTAssertEqual(d, expectedDateResult)
d = date("2010-05-28T21:03:38Z")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 7, 13, 17, 6, 40)
d = date("2010-07-13T17:06:40+00:00")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 4, 30, 12, 0, 0)
d = date("30 Apr 2010 5:00 PDT")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 5, 21, 21, 22, 53)
d = date("21 May 2010 21:22:53 GMT")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 6, 9, 5, 0, 0)
d = date("Wed, 09 Jun 2010 00:00 EST")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 6, 23, 3, 43, 50)
d = date("Wed, 23 Jun 2010 03:43:50 Z")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 6, 22, 3, 57, 49)
d = date("2010-06-22T03:57:49+00:00")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07)
d = date("2010-11-17T08:40:07-05:00")
XCTAssertEqual(d, expectedDateResult)
}
func testAtomDateWithMissingTCharacter() {
let expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07)
let d = date("2010-11-17 08:40:07-05:00")
XCTAssertEqual(d, expectedDateResult)
}
func testFeedbinDate() {
let expectedDateResult = dateWithValues(2019, 9, 27, 21, 01, 48)
let d = date("2019-09-27T21:01:48.000000Z")
XCTAssertEqual(d, expectedDateResult)
}
func testHighMillisecondDate() {
let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56)
let d = date("2021-03-29T10:46:56.516941+00:00")
XCTAssertEqual(d, expectedDateResult)
}
}
private extension DateParserTests {
func date(_ string: String) -> Date? {
let d = Data(string.utf8)
return Date(data: d)
}
}

View File

@ -1,109 +0,0 @@
//
// RSDateParserTests.swift
//
//
// Created by Maurice Parker on 4/1/21.
//
import Foundation
import XCTest
import Parser
import ParserObjC
class RSDateParserTests: XCTestCase {
static func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int) -> Date {
var dateComponents = DateComponents()
dateComponents.calendar = Calendar.current
dateComponents.timeZone = TimeZone(secondsFromGMT: 0)
dateComponents.year = year
dateComponents.month = month
dateComponents.day = day
dateComponents.hour = hour
dateComponents.minute = minute
dateComponents.second = second
return dateComponents.date!
}
func testDateWithString() {
var expectedDateResult = Self.dateWithValues(2010, 5, 28, 21, 3, 38)
var d = RSDateWithString("Fri, 28 May 2010 21:03:38 +0000")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("Fri, 28 May 2010 21:03:38 +00:00")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("Fri, 28 May 2010 21:03:38 -00:00")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("Fri, 28 May 2010 21:03:38 -0000")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("Fri, 28 May 2010 21:03:38 GMT")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("2010-05-28T21:03:38+00:00")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("2010-05-28T21:03:38+0000")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("2010-05-28T21:03:38-0000")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("2010-05-28T21:03:38-00:00")
XCTAssertEqual(d, expectedDateResult)
d = RSDateWithString("2010-05-28T21:03:38Z")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 7, 13, 17, 6, 40)
d = RSDateWithString("2010-07-13T17:06:40+00:00")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 4, 30, 12, 0, 0)
d = RSDateWithString("30 Apr 2010 5:00 PDT")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 5, 21, 21, 22, 53)
d = RSDateWithString("21 May 2010 21:22:53 GMT")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 6, 9, 5, 0, 0)
d = RSDateWithString("Wed, 09 Jun 2010 00:00 EST")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 6, 23, 3, 43, 50)
d = RSDateWithString("Wed, 23 Jun 2010 03:43:50 Z")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 6, 22, 3, 57, 49)
d = RSDateWithString("2010-06-22T03:57:49+00:00")
XCTAssertEqual(d, expectedDateResult)
expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07)
d = RSDateWithString("2010-11-17T08:40:07-05:00")
XCTAssertEqual(d, expectedDateResult)
}
func testAtomDateWithMissingTCharacter() {
let expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07)
let d = RSDateWithString("2010-11-17 08:40:07-05:00")
XCTAssertEqual(d, expectedDateResult)
}
func testFeedbinDate() {
let expectedDateResult = Self.dateWithValues(2019, 9, 27, 21, 01, 48)
let d = RSDateWithString("2019-09-27T21:01:48.000000Z")
XCTAssertEqual(d, expectedDateResult)
}
// func testHighMillisecondDate() {
// let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56)
// let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00")
// XCTAssertEqual(d, expectedDateResult)
// }
}