Continue progress on porting feed parsers.

This commit is contained in:
Brent Simmons 2024-09-09 20:54:42 -07:00
parent 77c1e26600
commit 24e7eb90f6
10 changed files with 789 additions and 629 deletions

View File

@ -6,6 +6,16 @@
<dict>
<key>DateParserTests</key>
<dict>
<key>testPubDateParsingPerformance()</key>
<dict>
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
<dict>
<key>baselineAverage</key>
<real>0.000131</real>
<key>baselineIntegrationDisplayName</key>
<string>Local Baseline</string>
</dict>
</dict>
<key>testW3CParsingPerformance()</key>
<dict>
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>

View File

@ -12,65 +12,65 @@ import SAX
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
// You dont need to know the type of feed.
public struct FeedParser {
public static func canParse(_ parserData: ParserData) -> Bool {
let type = feedType(parserData)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom:
return true
default:
return false
}
}
public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? {
let type = feedType(parserData)
switch type {
case .jsonFeed:
return try JSONFeedParser.parse(parserData)
case .rssInJSON:
return try RSSInJSONParser.parse(parserData)
case .rss:
return RSSParser.parse(parserData)
case .atom:
return AtomParser.parse(parserData)
case .unknown, .notAFeed:
return nil
}
}
/// For unit tests measuring performance.
public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? {
let type = feedType(parserData)
switch type {
case .jsonFeed:
return try JSONFeedParser.parse(parserData)
case .rssInJSON:
return try RSSInJSONParser.parse(parserData)
case .rss:
return RSSParser.parse(parserData)
case .atom:
return AtomParser.parse(parserData)
case .unknown, .notAFeed:
return nil
}
}
}
//public struct FeedParser {
//
// public static func canParse(_ parserData: ParserData) -> Bool {
//
// let type = feedType(parserData)
//
// switch type {
// case .jsonFeed, .rssInJSON, .rss, .atom:
// return true
// default:
// return false
// }
// }
//
// public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? {
//
// let type = feedType(parserData)
//
// switch type {
//
// case .jsonFeed:
// return try JSONFeedParser.parse(parserData)
//
// case .rssInJSON:
// return try RSSInJSONParser.parse(parserData)
//
// case .rss:
// return RSSParser.parse(parserData)
//
// case .atom:
// return AtomParser.parse(parserData)
//
// case .unknown, .notAFeed:
// return nil
// }
// }
//
// /// For unit tests measuring performance.
// public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? {
//
// let type = feedType(parserData)
//
// switch type {
//
// case .jsonFeed:
// return try JSONFeedParser.parse(parserData)
//
// case .rssInJSON:
// return try RSSInJSONParser.parse(parserData)
//
// case .rss:
// return RSSParser.parse(parserData)
//
// case .atom:
// return AtomParser.parse(parserData)
//
// case .unknown, .notAFeed:
// return nil
// }
// }
//
//}

View File

@ -19,44 +19,44 @@ public enum FeedType: Sendable {
}
private let minNumberOfBytesRequired = 128
public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType {
// Can call with partial data while still downloading, for instance.
// If theres not enough data, return .unknown. Ask again when theres more data.
// If its definitely not a feed, return .notAFeed.
//
// This is fast enough to call on the main thread.
if parserData.data.count < minNumberOfBytesRequired {
return .unknown
}
let nsdata = parserData.data as NSData
if nsdata.isProbablyJSONFeed() {
return .jsonFeed
}
if nsdata.isProbablyRSSInJSON() {
return .rssInJSON
}
if nsdata.isProbablyRSS() {
return .rss
}
if nsdata.isProbablyAtom() {
return .atom
}
if isPartialData && nsdata.isProbablyJSON() {
// Might not be able to detect a JSON Feed without all data.
// Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// has, at this writing, the JSON version element at the end of the feed,
// which is totally legal but it means not being able to detect
// that its a JSON Feed without all the data.
// So this returns .unknown instead of .notAFeed.
return .unknown
}
return .notAFeed
}
//private let minNumberOfBytesRequired = 128
//
//public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType {
//
// // Can call with partial data while still downloading, for instance.
// // If theres not enough data, return .unknown. Ask again when theres more data.
// // If its definitely not a feed, return .notAFeed.
// //
// // This is fast enough to call on the main thread.
//
// if parserData.data.count < minNumberOfBytesRequired {
// return .unknown
// }
//
// let nsdata = parserData.data as NSData
//
// if nsdata.isProbablyJSONFeed() {
// return .jsonFeed
// }
// if nsdata.isProbablyRSSInJSON() {
// return .rssInJSON
// }
// if nsdata.isProbablyRSS() {
// return .rss
// }
// if nsdata.isProbablyAtom() {
// return .atom
// }
//
// if isPartialData && nsdata.isProbablyJSON() {
// // Might not be able to detect a JSON Feed without all data.
// // Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// // has, at this writing, the JSON version element at the end of the feed,
// // which is totally legal but it means not being able to detect
// // that its a JSON Feed without all the data.
// // So this returns .unknown instead of .notAFeed.
// return .unknown
// }
//
// return .notAFeed
//}

View File

@ -1,248 +1,248 @@
////
//// JSONFeedParser.swift
//// RSParser
////
//// Created by Brent Simmons on 6/25/17.
//// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
////
//
// JSONFeedParser.swift
// RSParser
//import Foundation
//import SAX
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//// See https://jsonfeed.org/version/1.1
//
import Foundation
import SAX
// See https://jsonfeed.org/version/1.1
public struct JSONFeedParser {
struct Key {
static let version = "version"
static let items = "items"
static let title = "title"
static let homePageURL = "home_page_url"
static let feedURL = "feed_url"
static let feedDescription = "description"
static let nextURL = "next_url"
static let icon = "icon"
static let favicon = "favicon"
static let expired = "expired"
static let author = "author"
static let authors = "authors"
static let name = "name"
static let url = "url"
static let avatar = "avatar"
static let hubs = "hubs"
static let type = "type"
static let contentHTML = "content_html"
static let contentText = "content_text"
static let externalURL = "external_url"
static let summary = "summary"
static let image = "image"
static let bannerImage = "banner_image"
static let datePublished = "date_published"
static let dateModified = "date_modified"
static let tags = "tags"
static let uniqueID = "id"
static let attachments = "attachments"
static let mimeType = "mime_type"
static let sizeInBytes = "size_in_bytes"
static let durationInSeconds = "duration_in_seconds"
static let language = "language"
}
static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct.
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
guard let d = JSONUtilities.dictionary(with: parserData.data) else {
throw FeedParserError(.invalidJSON)
}
guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else {
throw FeedParserError(.jsonFeedVersionNotFound)
}
guard let itemsArray = d[Key.items] as? JSONArray else {
throw FeedParserError(.jsonFeedItemsNotFound)
}
guard let title = d[Key.title] as? String else {
throw FeedParserError(.jsonFeedTitleNotFound)
}
let authors = parseAuthors(d)
let homePageURL = d[Key.homePageURL] as? String
let feedURL = d[Key.feedURL] as? String ?? parserData.url
let feedDescription = d[Key.feedDescription] as? String
let nextURL = d[Key.nextURL] as? String
let iconURL = d[Key.icon] as? String
let faviconURL = d[Key.favicon] as? String
let expired = d[Key.expired] as? Bool ?? false
let hubs = parseHubs(d)
let language = d[Key.language] as? String
let items = parseItems(itemsArray, parserData.url)
return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items)
}
}
private extension JSONFeedParser {
static func parseAuthors(_ dictionary: JSONDictionary) -> Set<ParsedAuthor>? {
if let authorsArray = dictionary[Key.authors] as? JSONArray {
var authors = Set<ParsedAuthor>()
for author in authorsArray {
if let parsedAuthor = parseAuthor(author) {
authors.insert(parsedAuthor)
}
}
return authors
}
guard let authorDictionary = dictionary[Key.author] as? JSONDictionary,
let parsedAuthor = parseAuthor(authorDictionary) else {
return nil
}
return Set([parsedAuthor])
}
static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? {
let name = dictionary[Key.name] as? String
let url = dictionary[Key.url] as? String
let avatar = dictionary[Key.avatar] as? String
if name == nil && url == nil && avatar == nil {
return nil
}
return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil)
}
static func parseHubs(_ dictionary: JSONDictionary) -> Set<ParsedHub>? {
guard let hubsArray = dictionary[Key.hubs] as? JSONArray else {
return nil
}
let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in
guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else {
return nil
}
return ParsedHub(type: hubType, url: hubURL)
}
return hubs.isEmpty ? nil : Set(hubs)
}
static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in
return parseItem(oneItemDictionary, feedURL)
})
}
static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
guard let uniqueID = parseUniqueID(itemDictionary) else {
return nil
}
let contentHTML = itemDictionary[Key.contentHTML] as? String
let contentText = itemDictionary[Key.contentText] as? String
if contentHTML == nil && contentText == nil {
return nil
}
let url = itemDictionary[Key.url] as? String
let externalURL = itemDictionary[Key.externalURL] as? String
let title = parseTitle(itemDictionary, feedURL)
let language = itemDictionary[Key.language] as? String
let summary = itemDictionary[Key.summary] as? String
let imageURL = itemDictionary[Key.image] as? String
let bannerImageURL = itemDictionary[Key.bannerImage] as? String
let datePublished = parseDate(itemDictionary[Key.datePublished] as? String)
let dateModified = parseDate(itemDictionary[Key.dateModified] as? String)
let authors = parseAuthors(itemDictionary)
var tags: Set<String>? = nil
if let tagsArray = itemDictionary[Key.tags] as? [String] {
tags = Set(tagsArray)
}
let attachments = parseAttachments(itemDictionary)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments)
}
static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? {
guard let title = itemDictionary[Key.title] as? String else {
return nil
}
if isSpecialCaseTitleWithEntitiesFeed(feedURL) {
return (title as NSString).rsparser_stringByDecodingHTMLEntities()
}
return title
}
static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool {
// As of 16 Feb. 2018, Kottkes and Heers feeds includes HTML entities in the title elements.
// If we find more feeds like this, well add them here. If these feeds get fixed, well remove them.
let lowerFeedURL = feedURL.lowercased()
let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"]
for matchString in matchStrings {
if lowerFeedURL.contains(matchString) {
return true
}
}
return false
}
static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? {
if let uniqueID = itemDictionary[Key.uniqueID] as? String {
return uniqueID // Spec says it must be a string
}
// Version 1 spec also says that if its a number, even though thats incorrect, it should be coerced to a string.
if let uniqueID = itemDictionary[Key.uniqueID] as? Int {
return "\(uniqueID)"
}
if let uniqueID = itemDictionary[Key.uniqueID] as? Double {
return "\(uniqueID)"
}
return nil
}
static func parseDate(_ dateString: String?) -> Date? {
guard let dateString = dateString, !dateString.isEmpty else {
return nil
}
return RSDateWithString(dateString)
}
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else {
return nil
}
return Set(attachmentsArray.compactMap { parseAttachment($0) })
}
static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? {
guard let url = attachmentObject[Key.url] as? String else {
return nil
}
guard let mimeType = attachmentObject[Key.mimeType] as? String else {
return nil
}
let title = attachmentObject[Key.title] as? String
let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int
let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int
return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds)
}
}
//public struct JSONFeedParser {
//
// struct Key {
// static let version = "version"
// static let items = "items"
// static let title = "title"
// static let homePageURL = "home_page_url"
// static let feedURL = "feed_url"
// static let feedDescription = "description"
// static let nextURL = "next_url"
// static let icon = "icon"
// static let favicon = "favicon"
// static let expired = "expired"
// static let author = "author"
// static let authors = "authors"
// static let name = "name"
// static let url = "url"
// static let avatar = "avatar"
// static let hubs = "hubs"
// static let type = "type"
// static let contentHTML = "content_html"
// static let contentText = "content_text"
// static let externalURL = "external_url"
// static let summary = "summary"
// static let image = "image"
// static let bannerImage = "banner_image"
// static let datePublished = "date_published"
// static let dateModified = "date_modified"
// static let tags = "tags"
// static let uniqueID = "id"
// static let attachments = "attachments"
// static let mimeType = "mime_type"
// static let sizeInBytes = "size_in_bytes"
// static let durationInSeconds = "duration_in_seconds"
// static let language = "language"
// }
//
// static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct.
//
// public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
//
// guard let d = JSONUtilities.dictionary(with: parserData.data) else {
// throw FeedParserError(.invalidJSON)
// }
//
// guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else {
// throw FeedParserError(.jsonFeedVersionNotFound)
// }
// guard let itemsArray = d[Key.items] as? JSONArray else {
// throw FeedParserError(.jsonFeedItemsNotFound)
// }
// guard let title = d[Key.title] as? String else {
// throw FeedParserError(.jsonFeedTitleNotFound)
// }
//
// let authors = parseAuthors(d)
// let homePageURL = d[Key.homePageURL] as? String
// let feedURL = d[Key.feedURL] as? String ?? parserData.url
// let feedDescription = d[Key.feedDescription] as? String
// let nextURL = d[Key.nextURL] as? String
// let iconURL = d[Key.icon] as? String
// let faviconURL = d[Key.favicon] as? String
// let expired = d[Key.expired] as? Bool ?? false
// let hubs = parseHubs(d)
// let language = d[Key.language] as? String
//
// let items = parseItems(itemsArray, parserData.url)
//
// return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items)
// }
//}
//
//private extension JSONFeedParser {
//
// static func parseAuthors(_ dictionary: JSONDictionary) -> Set<ParsedAuthor>? {
//
// if let authorsArray = dictionary[Key.authors] as? JSONArray {
// var authors = Set<ParsedAuthor>()
// for author in authorsArray {
// if let parsedAuthor = parseAuthor(author) {
// authors.insert(parsedAuthor)
// }
// }
// return authors
// }
//
// guard let authorDictionary = dictionary[Key.author] as? JSONDictionary,
// let parsedAuthor = parseAuthor(authorDictionary) else {
// return nil
// }
//
// return Set([parsedAuthor])
// }
//
// static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? {
// let name = dictionary[Key.name] as? String
// let url = dictionary[Key.url] as? String
// let avatar = dictionary[Key.avatar] as? String
// if name == nil && url == nil && avatar == nil {
// return nil
// }
// return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil)
// }
//
// static func parseHubs(_ dictionary: JSONDictionary) -> Set<ParsedHub>? {
//
// guard let hubsArray = dictionary[Key.hubs] as? JSONArray else {
// return nil
// }
//
// let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in
// guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else {
// return nil
// }
// return ParsedHub(type: hubType, url: hubURL)
// }
// return hubs.isEmpty ? nil : Set(hubs)
// }
//
// static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
//
// return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in
// return parseItem(oneItemDictionary, feedURL)
// })
// }
//
// static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
//
// guard let uniqueID = parseUniqueID(itemDictionary) else {
// return nil
// }
//
// let contentHTML = itemDictionary[Key.contentHTML] as? String
// let contentText = itemDictionary[Key.contentText] as? String
// if contentHTML == nil && contentText == nil {
// return nil
// }
//
// let url = itemDictionary[Key.url] as? String
// let externalURL = itemDictionary[Key.externalURL] as? String
// let title = parseTitle(itemDictionary, feedURL)
// let language = itemDictionary[Key.language] as? String
// let summary = itemDictionary[Key.summary] as? String
// let imageURL = itemDictionary[Key.image] as? String
// let bannerImageURL = itemDictionary[Key.bannerImage] as? String
//
// let datePublished = parseDate(itemDictionary[Key.datePublished] as? String)
// let dateModified = parseDate(itemDictionary[Key.dateModified] as? String)
//
// let authors = parseAuthors(itemDictionary)
// var tags: Set<String>? = nil
// if let tagsArray = itemDictionary[Key.tags] as? [String] {
// tags = Set(tagsArray)
// }
// let attachments = parseAttachments(itemDictionary)
//
// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments)
// }
//
// static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? {
//
// guard let title = itemDictionary[Key.title] as? String else {
// return nil
// }
//
// if isSpecialCaseTitleWithEntitiesFeed(feedURL) {
// return (title as NSString).rsparser_stringByDecodingHTMLEntities()
// }
//
// return title
// }
//
// static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool {
//
// // As of 16 Feb. 2018, Kottkes and Heers feeds includes HTML entities in the title elements.
// // If we find more feeds like this, well add them here. If these feeds get fixed, well remove them.
//
// let lowerFeedURL = feedURL.lowercased()
// let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"]
// for matchString in matchStrings {
// if lowerFeedURL.contains(matchString) {
// return true
// }
// }
//
// return false
// }
//
// static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? {
//
// if let uniqueID = itemDictionary[Key.uniqueID] as? String {
// return uniqueID // Spec says it must be a string
// }
// // Version 1 spec also says that if its a number, even though thats incorrect, it should be coerced to a string.
// if let uniqueID = itemDictionary[Key.uniqueID] as? Int {
// return "\(uniqueID)"
// }
// if let uniqueID = itemDictionary[Key.uniqueID] as? Double {
// return "\(uniqueID)"
// }
// return nil
// }
//
// static func parseDate(_ dateString: String?) -> Date? {
//
// guard let dateString = dateString, !dateString.isEmpty else {
// return nil
// }
// return RSDateWithString(dateString)
// }
//
// static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
//
// guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else {
// return nil
// }
// return Set(attachmentsArray.compactMap { parseAttachment($0) })
// }
//
// static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? {
//
// guard let url = attachmentObject[Key.url] as? String else {
// return nil
// }
// guard let mimeType = attachmentObject[Key.mimeType] as? String else {
// return nil
// }
//
// let title = attachmentObject[Key.title] as? String
// let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int
// let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int
//
// return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds)
// }
//}

View File

@ -1,182 +1,182 @@
////
//// RSSInJSONParser.swift
//// RSParser
////
//// Created by Brent Simmons on 6/24/17.
//// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
////
//
// RSSInJSONParser.swift
// RSParser
//import Foundation
//import SAX
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
//// Also: http://cyber.harvard.edu/rss/rss.html
//
import Foundation
import SAX
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
// Also: http://cyber.harvard.edu/rss/rss.html
public struct RSSInJSONParser {
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
do {
guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else {
throw FeedParserError(.invalidJSON)
}
guard let rssObject = parsedObject["rss"] as? JSONDictionary else {
throw FeedParserError(.rssChannelNotFound)
}
guard let channelObject = rssObject["channel"] as? JSONDictionary else {
throw FeedParserError(.rssChannelNotFound)
}
// Id bet money that in practice the items array wont always appear correctly inside the channel object.
// Id also bet that sometimes it gets called "items" instead of "item".
var itemsObject = channelObject["item"] as? JSONArray
if itemsObject == nil {
itemsObject = parsedObject["item"] as? JSONArray
}
if itemsObject == nil {
itemsObject = channelObject["items"] as? JSONArray
}
if itemsObject == nil {
itemsObject = parsedObject["items"] as? JSONArray
}
if itemsObject == nil {
throw FeedParserError(.rssItemsNotFound)
}
let title = channelObject["title"] as? String
let homePageURL = channelObject["link"] as? String
let feedURL = parserData.url
let feedDescription = channelObject["description"] as? String
let feedLanguage = channelObject["language"] as? String
let items = parseItems(itemsObject!, parserData.url)
return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
catch { throw error }
}
}
private extension RSSInJSONParser {
static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in
return parsedItemWithDictionary(oneItemDictionary, feedURL)
})
}
static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
let externalURL = itemDictionary["link"] as? String
let title = itemDictionary["title"] as? String
var contentHTML = itemDictionary["description"] as? String
var contentText: String? = nil
if contentHTML != nil && !(contentHTML!.contains("<")) {
contentText = contentHTML
contentHTML = nil
}
if contentHTML == nil && contentText == nil && title == nil {
return nil
}
var datePublished: Date? = nil
if let datePublishedString = itemDictionary["pubDate"] as? String {
datePublished = RSDateWithString(datePublishedString)
}
let authors = parseAuthors(itemDictionary)
let tags = parseTags(itemDictionary)
let attachments = parseAttachments(itemDictionary)
var uniqueID: String? = itemDictionary["guid"] as? String
if uniqueID == nil {
// Calculate a uniqueID based on a combination of non-empty elements. Then hash the result.
// Items should have guids. When they don't, re-runs are very likely
// because there's no other 100% reliable way to determine identity.
// This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.)
var s = ""
if let datePublished = datePublished {
s += "\(datePublished.timeIntervalSince1970)"
}
if let title = title {
s += title
}
if let externalURL = externalURL {
s += externalURL
}
if let authorEmailAddress = authors?.first?.emailAddress {
s += authorEmailAddress
}
if let oneAttachmentURL = attachments?.first?.url {
s += oneAttachmentURL
}
if s.isEmpty {
// Sheesh. Tough case.
if let _ = contentHTML {
s = contentHTML!
}
if let _ = contentText {
s = contentText!
}
}
uniqueID = (s as NSString).rsparser_md5Hash()
}
if let uniqueID = uniqueID {
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments)
}
return nil
}
static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set<ParsedAuthor>? {
guard let authorEmailAddress = itemDictionary["author"] as? String else {
return nil
}
let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
return Set([parsedAuthor])
}
static func parseTags(_ itemDictionary: JSONDictionary) -> Set<String>? {
if let categoryObject = itemDictionary["category"] as? JSONDictionary {
if let oneTag = categoryObject["#value"] as? String {
return Set([oneTag])
}
return nil
}
else if let categoryArray = itemDictionary["category"] as? JSONArray {
return Set(categoryArray.compactMap{ $0["#value"] as? String })
}
return nil
}
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else {
return nil
}
guard let attachmentURL = enclosureObject["url"] as? String else {
return nil
}
var attachmentSize = enclosureObject["length"] as? Int
if attachmentSize == nil {
if let attachmentSizeString = enclosureObject["length"] as? String {
attachmentSize = (attachmentSizeString as NSString).integerValue
}
}
let type = enclosureObject["type"] as? String
if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) {
return Set([attachment])
}
return nil
}
}
//public struct RSSInJSONParser {
//
// public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
//
// do {
// guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else {
// throw FeedParserError(.invalidJSON)
// }
// guard let rssObject = parsedObject["rss"] as? JSONDictionary else {
// throw FeedParserError(.rssChannelNotFound)
// }
// guard let channelObject = rssObject["channel"] as? JSONDictionary else {
// throw FeedParserError(.rssChannelNotFound)
// }
//
// // Id bet money that in practice the items array wont always appear correctly inside the channel object.
// // Id also bet that sometimes it gets called "items" instead of "item".
// var itemsObject = channelObject["item"] as? JSONArray
// if itemsObject == nil {
// itemsObject = parsedObject["item"] as? JSONArray
// }
// if itemsObject == nil {
// itemsObject = channelObject["items"] as? JSONArray
// }
// if itemsObject == nil {
// itemsObject = parsedObject["items"] as? JSONArray
// }
// if itemsObject == nil {
// throw FeedParserError(.rssItemsNotFound)
// }
//
// let title = channelObject["title"] as? String
// let homePageURL = channelObject["link"] as? String
// let feedURL = parserData.url
// let feedDescription = channelObject["description"] as? String
// let feedLanguage = channelObject["language"] as? String
//
// let items = parseItems(itemsObject!, parserData.url)
//
// return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
//
// }
// catch { throw error }
// }
//}
//
//private extension RSSInJSONParser {
//
// static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
//
// return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in
//
// return parsedItemWithDictionary(oneItemDictionary, feedURL)
// })
// }
//
// static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
//
// let externalURL = itemDictionary["link"] as? String
// let title = itemDictionary["title"] as? String
//
// var contentHTML = itemDictionary["description"] as? String
// var contentText: String? = nil
// if contentHTML != nil && !(contentHTML!.contains("<")) {
// contentText = contentHTML
// contentHTML = nil
// }
// if contentHTML == nil && contentText == nil && title == nil {
// return nil
// }
//
// var datePublished: Date? = nil
// if let datePublishedString = itemDictionary["pubDate"] as? String {
// datePublished = RSDateWithString(datePublishedString)
// }
//
// let authors = parseAuthors(itemDictionary)
// let tags = parseTags(itemDictionary)
// let attachments = parseAttachments(itemDictionary)
//
// var uniqueID: String? = itemDictionary["guid"] as? String
// if uniqueID == nil {
//
// // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result.
// // Items should have guids. When they don't, re-runs are very likely
// // because there's no other 100% reliable way to determine identity.
// // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.)
//
// var s = ""
// if let datePublished = datePublished {
// s += "\(datePublished.timeIntervalSince1970)"
// }
// if let title = title {
// s += title
// }
// if let externalURL = externalURL {
// s += externalURL
// }
// if let authorEmailAddress = authors?.first?.emailAddress {
// s += authorEmailAddress
// }
// if let oneAttachmentURL = attachments?.first?.url {
// s += oneAttachmentURL
// }
// if s.isEmpty {
// // Sheesh. Tough case.
// if let _ = contentHTML {
// s = contentHTML!
// }
// if let _ = contentText {
// s = contentText!
// }
// }
// uniqueID = (s as NSString).rsparser_md5Hash()
// }
//
// if let uniqueID = uniqueID {
// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments)
// }
// return nil
// }
//
// static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set<ParsedAuthor>? {
//
// guard let authorEmailAddress = itemDictionary["author"] as? String else {
// return nil
// }
// let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
// return Set([parsedAuthor])
// }
//
// static func parseTags(_ itemDictionary: JSONDictionary) -> Set<String>? {
//
// if let categoryObject = itemDictionary["category"] as? JSONDictionary {
// if let oneTag = categoryObject["#value"] as? String {
// return Set([oneTag])
// }
// return nil
// }
// else if let categoryArray = itemDictionary["category"] as? JSONArray {
// return Set(categoryArray.compactMap{ $0["#value"] as? String })
// }
// return nil
// }
//
// static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
//
// guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else {
// return nil
// }
// guard let attachmentURL = enclosureObject["url"] as? String else {
// return nil
// }
//
// var attachmentSize = enclosureObject["length"] as? Int
// if attachmentSize == nil {
// if let attachmentSizeString = enclosureObject["length"] as? String {
// attachmentSize = (attachmentSizeString as NSString).integerValue
// }
// }
//
// let type = enclosureObject["type"] as? String
// if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) {
// return Set([attachment])
// }
// return nil
// }
//}

View File

@ -63,5 +63,10 @@ public final class ParsedItem: Hashable, Sendable {
hasher.combine(feedURL)
}
}
public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool {
lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments
}
}

View File

@ -17,13 +17,13 @@ import SAX
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct AtomParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}
//public struct AtomParser {
//
// public static func parse(_ parserData: ParserData) -> ParsedFeed? {
//
// if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) {
// return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
// }
// return nil
// }
//}

View File

@ -13,65 +13,65 @@ import Foundation
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
// which is part of providing a single API for feed parsing.
struct RSParsedFeedTransformer {
static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
let items = parsedItems(rsParsedFeed.articles)
return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
}
private extension RSParsedFeedTransformer {
static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> Set<ParsedItem> {
// Create Set<ParsedItem> from Set<RSParsedArticle>
return Set(parsedArticles.map(parsedItem))
}
static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
let uniqueID = parsedArticle.articleID
let url = parsedArticle.permalink
let externalURL = parsedArticle.link
let title = parsedArticle.title
let language = parsedArticle.language
let contentHTML = parsedArticle.body
let datePublished = parsedArticle.datePublished
let dateModified = parsedArticle.dateModified
let authors = parsedAuthors(parsedArticle.authors)
let attachments = parsedAttachments(parsedArticle.enclosures)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
}
static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
guard let authors = authors, !authors.isEmpty else {
return nil
}
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
}
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
}
static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {
guard let enclosures = enclosures, !enclosures.isEmpty else {
return nil
}
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
}
return attachments.isEmpty ? nil : Set(attachments)
}
}
//struct RSParsedFeedTransformer {
//
// static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
//
// let items = parsedItems(rsParsedFeed.articles)
// return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
// }
//}
//
//private extension RSParsedFeedTransformer {
//
// static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> Set<ParsedItem> {
//
// // Create Set<ParsedItem> from Set<RSParsedArticle>
//
// return Set(parsedArticles.map(parsedItem))
// }
//
// static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
//
// let uniqueID = parsedArticle.articleID
// let url = parsedArticle.permalink
// let externalURL = parsedArticle.link
// let title = parsedArticle.title
// let language = parsedArticle.language
// let contentHTML = parsedArticle.body
// let datePublished = parsedArticle.datePublished
// let dateModified = parsedArticle.dateModified
// let authors = parsedAuthors(parsedArticle.authors)
// let attachments = parsedAttachments(parsedArticle.enclosures)
//
// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
// }
//
// static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
//
// guard let authors = authors, !authors.isEmpty else {
// return nil
// }
//
// let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
// return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
// }
//
// return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
// }
//
// static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {
//
// guard let enclosures = enclosures, !enclosures.isEmpty else {
// return nil
// }
//
// let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
//
// let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil
// return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
// }
//
// return attachments.isEmpty ? nil : Set(attachments)
// }
//}

View File

@ -33,7 +33,7 @@ public final class RSSParser {
private var parsingAuthor = false
private var currentAttributes: SAXParser.XMLAttributesDictionary?
public static func parsedFeed(with parserData: ParserData) -> RSSFeed {
static func parsedFeed(with parserData: ParserData) -> RSSFeed {
let parser = RSSParser(parserData)
parser.parse()
@ -48,6 +48,12 @@ public final class RSSParser {
private extension RSSParser {
func parse() {
let saxParser = SAXParser(delegate: self, data: data)
saxParser.parse()
}
private struct XMLName {
static let uppercaseRDF = "RDF".utf8CString
static let item = "item".utf8CString
@ -63,9 +69,13 @@ private extension RSSParser {
static let dc = "dc".utf8CString
static let content = "content".utf8CString
static let encoded = "encoded".utf8CString
static let creator = "creator".utf8CString
static let date = "date".utf8CString
static let pubDate = "pubDate".utf8CString
static let description = "description".utf8CString
}
func addFeedElement(_ localName: XMLPointer, _ prefix: XMLPointer?) {
func addFeedElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
guard prefix == nil else {
return
@ -73,14 +83,14 @@ private extension RSSParser {
if SAXEqualTags(localName, XMLName.link) {
if feed.link == nil {
feed.link = currentString
feed.link = saxParser.currentString
}
}
else if SAXEqualTags(localName, XMLName.title) {
feed.title = currentString
feed.title = saxParser.currentString
}
else if SAXEqualTags(localName, XMLName.language) {
feed.language = currentString
feed.language = saxParser.currentString
}
}
@ -91,13 +101,17 @@ private extension RSSParser {
func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
if SAXEqualTags(prefix, XMLName.dc) {
addDCElement(localName)
return;
guard let currentArticle else {
return
}
if SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) {
if let currentString, !currentString.isEmpty {
if let prefix, SAXEqualTags(prefix, XMLName.dc) {
addDCElement(saxParser, localName, currentArticle)
return
}
if let prefix, SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) {
if let currentString = saxParser.currentString, !currentString.isEmpty {
currentArticle.body = currentString
}
return
@ -107,40 +121,171 @@ private extension RSSParser {
return
}
if SAXEqualTags(localName, XMLName.guid) {
addGuid()
if let currentString = saxParser.currentString {
if SAXEqualTags(localName, XMLName.guid) {
addGuid(currentString, currentArticle)
}
else if SAXEqualTags(localName, XMLName.author) {
addAuthorWithString(currentString, currentArticle)
}
else if SAXEqualTags(localName, XMLName.link) {
currentArticle.link = urlString(currentString)
}
else if SAXEqualTags(localName, XMLName.description) {
if currentArticle.body == nil {
currentArticle.body = currentString
}
}
else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) {
currentArticle.title = currentString
}
}
else if SAXEqualTags(localName, XMLName.pubDate) {
currentArticle.datePublished = currentDate(saxParser)
}
else if SAXEqualTags(localName, XMLName.author) {
addAuthorWithString(currentString)
else if SAXEqualTags(localName, XMLName.enclosure), let currentAttributes {
addEnclosure(currentAttributes, currentArticle)
}
else if SAXEqualTags(localName, XMLName.link) {
currentArticle.link = urlString(currentString)
}
else if SAXEqualTags(localName, XMLName.description) {
if currentArticle.body == nil {
currentArticle.body = currentString
}
func addDCElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ currentArticle: RSSArticle) {
if SAXEqualTags(localName, XMLName.creator) {
if let currentString = saxParser.currentString {
addAuthorWithString(currentString, currentArticle)
}
}
else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) {
if let currentString {
currentArticle.title = currentString
else if SAXEqualTags(localName, XMLName.date) {
currentArticle.datePublished = currentDate(saxParser)
}
}
static let isPermalinkKey = "isPermaLink"
static let isPermalinkLowercaseKey = "ispermalink"
static let falseValue = "false"
func addGuid(_ guid: String, _ currentArticle: RSSArticle) {
currentArticle.guid = guid
guard let currentAttributes else {
return
}
let isPermaLinkValue: String? = {
if let value = currentAttributes[Self.isPermalinkKey] {
return value
}
// Allow for `ispermalink`, `isPermalink`, etc.
for (key, value) in currentAttributes {
if key.lowercased() == Self.isPermalinkLowercaseKey {
return value
}
}
return nil
}()
// Spec: `isPermaLink is optional, its default value is true.`
// https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
// Return only if non-nil and equal to false  otherwise its a permalink.
if let isPermaLinkValue, isPermaLinkValue == Self.falseValue {
return
}
else if SAXEqualTags(localName, XMLName.enclosure) {
addEnclosure()
// Feed bug found in the wild: using a guid thats not really a permalink
// and not realizing that `isPermaLink` is true by default.
if stringIsProbablyAURLOrRelativePath(guid) {
currentArticle.permalink = urlString(guid)
}
}
func stringIsProbablyAURLOrRelativePath(_ s: String) -> Bool {
// The RSS guid is defined as a permalink, except when it appears like this:
// `<guid isPermaLink="false">someidentifier</guid>`
// However, people often seem to think its *not* a permalink by default, even
// though it is. So we try to detect the situation where the value is not a URL string,
// and not even a relative path. This may need to evolve over time.
if !s.contains("/") {
// This seems to be just about the best possible check.
// Bad guids are often just integers, for instance.
return false
}
if s.lowercased().hasPrefix("tag:") {
// A common non-URL guid form starts with `tag:`.
return false
}
return true
}
/// Do best attempt at turning a string into a URL string.
///
/// If it already appears to be a URL, return it.
/// Otherwise, treat it like a relative URL and resolve using
/// the URL of the home page of the feed (if available)
/// or the URL of the feed.
///
/// The returned value is not guaranteed to be a valid URL string.
/// Its a best attempt without going to heroic lengths.
func urlString(_ s: String) -> String {
if s.lowercased().hasPrefix("http") {
return s
}
let baseURLString = feed.link ?? feedURL
guard let baseURL = URL(string: baseURLString) else {
return s
}
guard let resolvedURL = URL(string: s, relativeTo: baseURL) else {
return s
}
return resolvedURL.absoluteString
}
func addAuthorWithString(_ authorString: String, _ currentArticle: RSSArticle) {
if authorString.isEmpty {
return
}
let author = RSSAuthor(singleString: authorString)
currentArticle.addAuthor(author)
}
private struct EnclosureKey {
static let url = "url"
static let length = "length"
static let type = "type"
}
func addEnclosure(_ attributes: SAXParser.XMLAttributesDictionary, _ currentArticle: RSSArticle) {
guard let url = attributes[EnclosureKey.url], !url.isEmpty else {
return
}
let enclosure = RSSEnclosure(url: url)
if let lengthValue = attributes[EnclosureKey.length], let length = Int(lengthValue) {
enclosure.length = length
}
enclosure.mimeType = attributes[EnclosureKey.type]
currentArticle.addEnclosure(enclosure)
}
func currentDate(_ saxParser: SAXParser) -> Date? {
guard let data = saxParser.currentCharacters else {
return nil
}
return DateParser.date(data: data)
}
}
@ -157,8 +302,8 @@ extension RSSParser: SAXParserDelegate {
return
}
var xmlAttributes: XMLAttributesDictionary? = nil
if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(enclosure, XMLName.enclosure) {
var xmlAttributes: SAXParser.XMLAttributesDictionary? = nil
if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(localName, XMLName.enclosure) {
xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount)
}
if currentAttributes != xmlAttributes {
@ -169,7 +314,7 @@ extension RSSParser: SAXParserDelegate {
addArticle()
parsingArticle = true
if isRDF && let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid
if isRDF, let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid
currentArticle.guid = rdfGuid
currentArticle.permalink = rdfGuid
}

View File

@ -1,5 +1,5 @@
//
// File.swift
// SAXUtilities.swift
//
//
// Created by Brent Simmons on 8/26/24.