Continue progress on porting feed parsers.

This commit is contained in:
Brent Simmons 2024-09-09 21:35:51 -07:00
parent 24e7eb90f6
commit eeb27475de
7 changed files with 96 additions and 93 deletions

View File

@ -335,7 +335,7 @@ private extension DateParser {
timeInfo.tm_gmtoff = 0; timeInfo.tm_gmtoff = 0;
timeInfo.tm_zone = nil; timeInfo.tm_zone = nil;
var rawTime = timegm(&timeInfo) - timeZoneOffset let rawTime = timegm(&timeInfo) - timeZoneOffset
if rawTime == time_t(UInt32.max) { if rawTime == time_t(UInt32.max) {
// NSCalendar is super-amazingly slow (which is partly why this parser exists), // NSCalendar is super-amazingly slow (which is partly why this parser exists),
@ -363,7 +363,7 @@ private extension DateParser {
timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0) timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0)
} }
return Date(timeIntervalSince1970: TimeInterval(timeInterval)) return Date(timeIntervalSince1970: timeInterval)
} }
// MARK: - Time Zones and Offsets // MARK: - Time Zones and Offsets

View File

@ -12,8 +12,8 @@ import SAX
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
// You dont need to know the type of feed. // You dont need to know the type of feed.
//public struct FeedParser { public struct FeedParser {
//
// public static func canParse(_ parserData: ParserData) -> Bool { // public static func canParse(_ parserData: ParserData) -> Bool {
// //
// let type = feedType(parserData) // let type = feedType(parserData)
@ -25,9 +25,13 @@ import SAX
// return false // return false
// } // }
// } // }
//
// public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? { public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
//
let rssFeed = RSSParser.parsedFeed(with: parserData)
let parsedFeed = RSSFeedTransformer.parsedFeed(with: rssFeed)
return parsedFeed
// let type = feedType(parserData) // let type = feedType(parserData)
// //
// switch type { // switch type {
@ -47,8 +51,8 @@ import SAX
// case .unknown, .notAFeed: // case .unknown, .notAFeed:
// return nil // return nil
// } // }
// } }
//
// /// For unit tests measuring performance. // /// For unit tests measuring performance.
// public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? { // public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? {
// //
@ -72,5 +76,5 @@ import SAX
// return nil // return nil
// } // }
// } // }
//
//} }

View File

@ -8,7 +8,7 @@
import Foundation import Foundation
public class ParsedFeed: Sendable { public final class ParsedFeed: Sendable {
public let type: FeedType public let type: FeedType
public let title: String? public let title: String?

View File

@ -66,7 +66,7 @@ public final class ParsedItem: Hashable, Sendable {
public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool { public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool {
lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title && lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments
} }
} }

View File

@ -1,77 +0,0 @@
//
// RSParsedFeedTransformer.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago.
// They create an RSParsedFeed object and related Objective-C objects.
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
// which is part of providing a single API for feed parsing.
//struct RSParsedFeedTransformer {
//
// static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
//
// let items = parsedItems(rsParsedFeed.articles)
// return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
// }
//}
//
//private extension RSParsedFeedTransformer {
//
// static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> Set<ParsedItem> {
//
// // Create Set<ParsedItem> from Set<RSParsedArticle>
//
// return Set(parsedArticles.map(parsedItem))
// }
//
// static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
//
// let uniqueID = parsedArticle.articleID
// let url = parsedArticle.permalink
// let externalURL = parsedArticle.link
// let title = parsedArticle.title
// let language = parsedArticle.language
// let contentHTML = parsedArticle.body
// let datePublished = parsedArticle.datePublished
// let dateModified = parsedArticle.dateModified
// let authors = parsedAuthors(parsedArticle.authors)
// let attachments = parsedAttachments(parsedArticle.enclosures)
//
// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
// }
//
// static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
//
// guard let authors = authors, !authors.isEmpty else {
// return nil
// }
//
// let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
// return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
// }
//
// return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
// }
//
// static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {
//
// guard let enclosures = enclosures, !enclosures.isEmpty else {
// return nil
// }
//
// let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
//
// let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil
// return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
// }
//
// return attachments.isEmpty ? nil : Set(attachments)
// }
//}

View File

@ -0,0 +1,75 @@
//
// RSSFeedTransformer.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
struct RSSFeedTransformer {
/// Turn an internal RSSFeed into a public ParsedFeed.
static func parsedFeed(with rssFeed: RSSFeed) -> ParsedFeed {
let items = parsedItems(rssFeed.articles)
return ParsedFeed(type: .rss, title: rssFeed.title, homePageURL: rssFeed.link, feedURL: rssFeed.urlString, language: rssFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
}
private extension RSSFeedTransformer {
static func parsedItems(_ articles: [RSSArticle]?) -> Set<ParsedItem> {
guard let articles else {
return Set<ParsedItem>()
}
return Set(articles.map(parsedItem))
}
static func parsedItem(_ article: RSSArticle) -> ParsedItem {
let uniqueID = article.articleID
let url = article.permalink
let externalURL = article.link
let title = article.title
let language = article.language
let contentHTML = article.body
let datePublished = article.datePublished
let dateModified = article.dateModified
let authors = parsedAuthors(article.authors)
let attachments = parsedAttachments(article.enclosures)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: article.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
}
static func parsedAuthors(_ authors: [RSSAuthor]?) -> Set<ParsedAuthor>? {
guard let authors = authors, !authors.isEmpty else {
return nil
}
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
}
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
}
static func parsedAttachments(_ enclosures: [RSSEnclosure]?) -> Set<ParsedAttachment>? {
guard let enclosures = enclosures, !enclosures.isEmpty else {
return nil
}
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
let sizeInBytes = (enclosure.length ?? 0) > 0 ? enclosure.length : nil
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
}
return attachments.isEmpty ? nil : Set(attachments)
}
}

View File

@ -59,7 +59,6 @@ private extension RSSParser {
static let item = "item".utf8CString static let item = "item".utf8CString
static let guid = "guid".utf8CString static let guid = "guid".utf8CString
static let enclosure = "enclosure".utf8CString static let enclosure = "enclosure".utf8CString
static let rdfAbout = "rdf:about".utf8CString
static let image = "image".utf8CString static let image = "image".utf8CString
static let author = "author".utf8CString static let author = "author".utf8CString
static let rss = "rss".utf8CString static let rss = "rss".utf8CString
@ -291,6 +290,8 @@ private extension RSSParser {
extension RSSParser: SAXParserDelegate { extension RSSParser: SAXParserDelegate {
static let rdfAbout = "rdf:about"
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) { public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
if endRSSFound { if endRSSFound {
@ -314,7 +315,7 @@ extension RSSParser: SAXParserDelegate {
addArticle() addArticle()
parsingArticle = true parsingArticle = true
if isRDF, let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid if isRDF, let rdfGuid = xmlAttributes?[Self.rdfAbout], let currentArticle { // RSS 1.0 guid
currentArticle.guid = rdfGuid currentArticle.guid = rdfGuid
currentArticle.permalink = rdfGuid currentArticle.permalink = rdfGuid
} }
@ -358,7 +359,7 @@ extension RSSParser: SAXParserDelegate {
} }
} }
else if !parsingChannelImage { else if !parsingChannelImage {
addFeedElement(localName, prefix) addFeedElement(saxParser, localName, prefix)
} }
} }