Continue progress on porting feed parsers.
This commit is contained in:
parent
24e7eb90f6
commit
eeb27475de
@ -335,7 +335,7 @@ private extension DateParser {
|
|||||||
timeInfo.tm_gmtoff = 0;
|
timeInfo.tm_gmtoff = 0;
|
||||||
timeInfo.tm_zone = nil;
|
timeInfo.tm_zone = nil;
|
||||||
|
|
||||||
var rawTime = timegm(&timeInfo) - timeZoneOffset
|
let rawTime = timegm(&timeInfo) - timeZoneOffset
|
||||||
if rawTime == time_t(UInt32.max) {
|
if rawTime == time_t(UInt32.max) {
|
||||||
|
|
||||||
// NSCalendar is super-amazingly slow (which is partly why this parser exists),
|
// NSCalendar is super-amazingly slow (which is partly why this parser exists),
|
||||||
@ -363,7 +363,7 @@ private extension DateParser {
|
|||||||
timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0)
|
timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
return Date(timeIntervalSince1970: TimeInterval(timeInterval))
|
return Date(timeIntervalSince1970: timeInterval)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Time Zones and Offsets
|
// MARK: - Time Zones and Offsets
|
||||||
|
@ -12,8 +12,8 @@ import SAX
|
|||||||
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
|
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
|
||||||
// You don’t need to know the type of feed.
|
// You don’t need to know the type of feed.
|
||||||
|
|
||||||
//public struct FeedParser {
|
public struct FeedParser {
|
||||||
//
|
|
||||||
// public static func canParse(_ parserData: ParserData) -> Bool {
|
// public static func canParse(_ parserData: ParserData) -> Bool {
|
||||||
//
|
//
|
||||||
// let type = feedType(parserData)
|
// let type = feedType(parserData)
|
||||||
@ -25,9 +25,13 @@ import SAX
|
|||||||
// return false
|
// return false
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
//
|
|
||||||
// public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? {
|
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
|
||||||
//
|
|
||||||
|
let rssFeed = RSSParser.parsedFeed(with: parserData)
|
||||||
|
let parsedFeed = RSSFeedTransformer.parsedFeed(with: rssFeed)
|
||||||
|
|
||||||
|
return parsedFeed
|
||||||
// let type = feedType(parserData)
|
// let type = feedType(parserData)
|
||||||
//
|
//
|
||||||
// switch type {
|
// switch type {
|
||||||
@ -47,8 +51,8 @@ import SAX
|
|||||||
// case .unknown, .notAFeed:
|
// case .unknown, .notAFeed:
|
||||||
// return nil
|
// return nil
|
||||||
// }
|
// }
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// /// For unit tests measuring performance.
|
// /// For unit tests measuring performance.
|
||||||
// public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? {
|
// public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? {
|
||||||
//
|
//
|
||||||
@ -72,5 +76,5 @@ import SAX
|
|||||||
// return nil
|
// return nil
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
//
|
|
||||||
//}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
import Foundation
|
import Foundation
|
||||||
|
|
||||||
public class ParsedFeed: Sendable {
|
public final class ParsedFeed: Sendable {
|
||||||
|
|
||||||
public let type: FeedType
|
public let type: FeedType
|
||||||
public let title: String?
|
public let title: String?
|
||||||
|
@ -66,7 +66,7 @@ public final class ParsedItem: Hashable, Sendable {
|
|||||||
|
|
||||||
public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool {
|
public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool {
|
||||||
|
|
||||||
lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments
|
lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title && lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,77 +0,0 @@
|
|||||||
//
|
|
||||||
// RSParsedFeedTransformer.swift
|
|
||||||
// RSParser
|
|
||||||
//
|
|
||||||
// Created by Brent Simmons on 6/25/17.
|
|
||||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
|
||||||
//
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
|
|
||||||
// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago.
|
|
||||||
// They create an RSParsedFeed object and related Objective-C objects.
|
|
||||||
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
|
|
||||||
// which is part of providing a single API for feed parsing.
|
|
||||||
|
|
||||||
//struct RSParsedFeedTransformer {
|
|
||||||
//
|
|
||||||
// static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
|
|
||||||
//
|
|
||||||
// let items = parsedItems(rsParsedFeed.articles)
|
|
||||||
// return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//private extension RSParsedFeedTransformer {
|
|
||||||
//
|
|
||||||
// static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> Set<ParsedItem> {
|
|
||||||
//
|
|
||||||
// // Create Set<ParsedItem> from Set<RSParsedArticle>
|
|
||||||
//
|
|
||||||
// return Set(parsedArticles.map(parsedItem))
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
|
|
||||||
//
|
|
||||||
// let uniqueID = parsedArticle.articleID
|
|
||||||
// let url = parsedArticle.permalink
|
|
||||||
// let externalURL = parsedArticle.link
|
|
||||||
// let title = parsedArticle.title
|
|
||||||
// let language = parsedArticle.language
|
|
||||||
// let contentHTML = parsedArticle.body
|
|
||||||
// let datePublished = parsedArticle.datePublished
|
|
||||||
// let dateModified = parsedArticle.dateModified
|
|
||||||
// let authors = parsedAuthors(parsedArticle.authors)
|
|
||||||
// let attachments = parsedAttachments(parsedArticle.enclosures)
|
|
||||||
//
|
|
||||||
// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
|
|
||||||
//
|
|
||||||
// guard let authors = authors, !authors.isEmpty else {
|
|
||||||
// return nil
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
|
|
||||||
// return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {
|
|
||||||
//
|
|
||||||
// guard let enclosures = enclosures, !enclosures.isEmpty else {
|
|
||||||
// return nil
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
|
|
||||||
//
|
|
||||||
// let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil
|
|
||||||
// return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return attachments.isEmpty ? nil : Set(attachments)
|
|
||||||
// }
|
|
||||||
//}
|
|
@ -0,0 +1,75 @@
|
|||||||
|
//
|
||||||
|
// RSSFeedTransformer.swift
|
||||||
|
// RSParser
|
||||||
|
//
|
||||||
|
// Created by Brent Simmons on 6/25/17.
|
||||||
|
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
struct RSSFeedTransformer {
|
||||||
|
|
||||||
|
/// Turn an internal RSSFeed into a public ParsedFeed.
|
||||||
|
static func parsedFeed(with rssFeed: RSSFeed) -> ParsedFeed {
|
||||||
|
|
||||||
|
let items = parsedItems(rssFeed.articles)
|
||||||
|
return ParsedFeed(type: .rss, title: rssFeed.title, homePageURL: rssFeed.link, feedURL: rssFeed.urlString, language: rssFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private extension RSSFeedTransformer {
|
||||||
|
|
||||||
|
static func parsedItems(_ articles: [RSSArticle]?) -> Set<ParsedItem> {
|
||||||
|
|
||||||
|
guard let articles else {
|
||||||
|
return Set<ParsedItem>()
|
||||||
|
}
|
||||||
|
|
||||||
|
return Set(articles.map(parsedItem))
|
||||||
|
}
|
||||||
|
|
||||||
|
static func parsedItem(_ article: RSSArticle) -> ParsedItem {
|
||||||
|
|
||||||
|
let uniqueID = article.articleID
|
||||||
|
let url = article.permalink
|
||||||
|
let externalURL = article.link
|
||||||
|
let title = article.title
|
||||||
|
let language = article.language
|
||||||
|
let contentHTML = article.body
|
||||||
|
let datePublished = article.datePublished
|
||||||
|
let dateModified = article.dateModified
|
||||||
|
let authors = parsedAuthors(article.authors)
|
||||||
|
let attachments = parsedAttachments(article.enclosures)
|
||||||
|
|
||||||
|
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: article.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
|
||||||
|
}
|
||||||
|
|
||||||
|
static func parsedAuthors(_ authors: [RSSAuthor]?) -> Set<ParsedAuthor>? {
|
||||||
|
|
||||||
|
guard let authors = authors, !authors.isEmpty else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
|
||||||
|
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
|
||||||
|
}
|
||||||
|
|
||||||
|
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
|
||||||
|
}
|
||||||
|
|
||||||
|
static func parsedAttachments(_ enclosures: [RSSEnclosure]?) -> Set<ParsedAttachment>? {
|
||||||
|
|
||||||
|
guard let enclosures = enclosures, !enclosures.isEmpty else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
|
||||||
|
|
||||||
|
let sizeInBytes = (enclosure.length ?? 0) > 0 ? enclosure.length : nil
|
||||||
|
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
return attachments.isEmpty ? nil : Set(attachments)
|
||||||
|
}
|
||||||
|
}
|
@ -59,7 +59,6 @@ private extension RSSParser {
|
|||||||
static let item = "item".utf8CString
|
static let item = "item".utf8CString
|
||||||
static let guid = "guid".utf8CString
|
static let guid = "guid".utf8CString
|
||||||
static let enclosure = "enclosure".utf8CString
|
static let enclosure = "enclosure".utf8CString
|
||||||
static let rdfAbout = "rdf:about".utf8CString
|
|
||||||
static let image = "image".utf8CString
|
static let image = "image".utf8CString
|
||||||
static let author = "author".utf8CString
|
static let author = "author".utf8CString
|
||||||
static let rss = "rss".utf8CString
|
static let rss = "rss".utf8CString
|
||||||
@ -291,6 +290,8 @@ private extension RSSParser {
|
|||||||
|
|
||||||
extension RSSParser: SAXParserDelegate {
|
extension RSSParser: SAXParserDelegate {
|
||||||
|
|
||||||
|
static let rdfAbout = "rdf:about"
|
||||||
|
|
||||||
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||||
|
|
||||||
if endRSSFound {
|
if endRSSFound {
|
||||||
@ -314,7 +315,7 @@ extension RSSParser: SAXParserDelegate {
|
|||||||
addArticle()
|
addArticle()
|
||||||
parsingArticle = true
|
parsingArticle = true
|
||||||
|
|
||||||
if isRDF, let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid
|
if isRDF, let rdfGuid = xmlAttributes?[Self.rdfAbout], let currentArticle { // RSS 1.0 guid
|
||||||
currentArticle.guid = rdfGuid
|
currentArticle.guid = rdfGuid
|
||||||
currentArticle.permalink = rdfGuid
|
currentArticle.permalink = rdfGuid
|
||||||
}
|
}
|
||||||
@ -358,7 +359,7 @@ extension RSSParser: SAXParserDelegate {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if !parsingChannelImage {
|
else if !parsingChannelImage {
|
||||||
addFeedElement(localName, prefix)
|
addFeedElement(saxParser, localName, prefix)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user