diff --git a/Mac/AppDelegate.swift b/Mac/AppDelegate.swift index d10af9671..22d92cc98 100644 --- a/Mac/AppDelegate.swift +++ b/Mac/AppDelegate.swift @@ -17,6 +17,7 @@ import OSLog import Core import CrashReporter import Images +import libxml2 // If we're not going to import Sparkle, provide dummy protocols to make it easy // for AppDelegate to comply @@ -104,6 +105,8 @@ import Sparkle override init() { + xmlInitParser() + NSWindow.allowsAutomaticWindowTabbing = false super.init() diff --git a/Modules/Account/Package.swift b/Modules/Account/Package.swift index 48ee3e33e..539cb3ea5 100644 --- a/Modules/Account/Package.swift +++ b/Modules/Account/Package.swift @@ -12,7 +12,6 @@ let package = Package( ], dependencies: [ .package(path: "../Parser"), - .package(path: "../ParserObjC"), .package(path: "../Articles"), .package(path: "../ArticlesDatabase"), .package(path: "../Web"), @@ -35,7 +34,6 @@ let package = Package( name: "Account", dependencies: [ "Parser", - "ParserObjC", "Web", "Articles", "ArticlesDatabase", diff --git a/Modules/Account/Sources/Account/Account.swift b/Modules/Account/Sources/Account/Account.swift index 278fe5cfe..f6343c755 100644 --- a/Modules/Account/Sources/Account/Account.swift +++ b/Modules/Account/Sources/Account/Account.swift @@ -13,7 +13,6 @@ import UIKit import Foundation import Articles import Parser -import ParserObjC import Database import ArticlesDatabase import Web @@ -465,14 +464,14 @@ public enum FetchType { delegate.accountWillBeDeleted(self) } - func addOPMLItems(_ items: [RSOPMLItem]) { + func addOPMLItems(_ items: [OPMLItem]) { for item in items { if let feedSpecifier = item.feedSpecifier { addFeed(newFeed(with: feedSpecifier)) } else { if let title = item.titleFromAttributes, let folder = ensureFolder(with: title) { folder.externalID = item.attributes?["nnw_externalID"] as? String - if let children = item.children { + if let children = item.items { for itemChild in children { if let feedSpecifier = itemChild.feedSpecifier { folder.addFeed(newFeed(with: feedSpecifier)) @@ -484,7 +483,7 @@ public enum FetchType { } } - func loadOPMLItems(_ items: [RSOPMLItem]) { + func loadOPMLItems(_ items: [OPMLItem]) { addOPMLItems(OPMLNormalizer.normalize(items)) } @@ -561,7 +560,7 @@ public enum FetchType { return folders?.first(where: { $0.externalID == externalID }) } - func newFeed(with opmlFeedSpecifier: RSOPMLFeedSpecifier) -> Feed { + func newFeed(with opmlFeedSpecifier: OPMLFeedSpecifier) -> Feed { let feedURL = opmlFeedSpecifier.feedURL let metadata = feedMetadata(feedURL: feedURL, feedID: feedURL) let feed = Feed(account: self, url: opmlFeedSpecifier.feedURL, metadata: metadata) diff --git a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift index 3e83b9da9..e65cc424a 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift @@ -12,7 +12,6 @@ import SystemConfiguration import os.log import SyncDatabase import Parser -import ParserObjC import Articles import ArticlesDatabase import Web @@ -176,9 +175,9 @@ enum CloudKitAccountDelegateError: LocalizedError { let opmlData = try Data(contentsOf: opmlFile) let parserData = ParserData(url: opmlFile.absoluteString, data: opmlData) - let opmlDocument = try RSOPMLParser.parseOPML(with: parserData) + let opmlDocument = OPMLParser.document(with: parserData) - guard let opmlItems = opmlDocument.children, let rootExternalID = account.externalID else { + guard let opmlItems = opmlDocument?.items, let rootExternalID = account.externalID else { return } diff --git a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift index 01ec34b3c..1695d12c7 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift @@ -9,7 +9,6 @@ import Foundation import os.log import Parser -import ParserObjC import Articles import ArticlesDatabase import Web @@ -78,8 +77,8 @@ final class LocalAccountDelegate: AccountDelegate { let opmlData = try Data(contentsOf: opmlFile) let parserData = ParserData(url: opmlFile.absoluteString, data: opmlData) - let opmlDocument = try RSOPMLParser.parseOPML(with: parserData) - guard let children = opmlDocument.children else { + let opmlDocument = OPMLParser.document(with: parserData) + guard let children = opmlDocument?.items else { return } @@ -268,9 +267,7 @@ private extension LocalAccountDelegate { return } - let parserData = ParserData(url: feed.url, data: data) - - guard let parsedFeed = try? await FeedParser.parse(parserData) else { + guard let parsedFeed = try? await FeedParser.parseAsync(urlString: feed.url, data: data) else { return } diff --git a/Modules/Account/Sources/Account/OPMLFile.swift b/Modules/Account/Sources/Account/OPMLFile.swift index c44b15d69..39221856e 100644 --- a/Modules/Account/Sources/Account/OPMLFile.swift +++ b/Modules/Account/Sources/Account/OPMLFile.swift @@ -9,7 +9,6 @@ import Foundation import os import Parser -import ParserObjC import Core @MainActor final class OPMLFile { @@ -34,7 +33,7 @@ import Core dataFile.markAsDirty() } - func opmlItems() -> [RSOPMLItem]? { + func opmlItems() -> [OPMLItem]? { guard let fileData = opmlFileData() else { return nil } @@ -62,18 +61,10 @@ private extension OPMLFile { return fileData } - func parsedOPMLItems(fileData: Data) -> [RSOPMLItem]? { + func parsedOPMLItems(fileData: Data) -> [OPMLItem]? { let parserData = ParserData(url: fileURL.absoluteString, data: fileData) - var opmlDocument: RSOPMLDocument? - - do { - opmlDocument = try RSOPMLParser.parseOPML(with: parserData) - } catch { - logger.error("OPML Import failed for \(self.fileURL): \(error.localizedDescription)") - return nil - } - - return opmlDocument?.children + let opmlDocument = OPMLParser.document(with: parserData) + return opmlDocument?.items } func opmlDocument() -> String { diff --git a/Modules/Account/Sources/Account/OPMLNormalizer.swift b/Modules/Account/Sources/Account/OPMLNormalizer.swift index bfad43687..2d5de573f 100644 --- a/Modules/Account/Sources/Account/OPMLNormalizer.swift +++ b/Modules/Account/Sources/Account/OPMLNormalizer.swift @@ -8,20 +8,19 @@ import Foundation import Parser -import ParserObjC final class OPMLNormalizer { - var normalizedOPMLItems = [RSOPMLItem]() + var normalizedOPMLItems = [OPMLItem]() - static func normalize(_ items: [RSOPMLItem]) -> [RSOPMLItem] { + static func normalize(_ items: [OPMLItem]) -> [OPMLItem] { let opmlNormalizer = OPMLNormalizer() opmlNormalizer.normalize(items) return opmlNormalizer.normalizedOPMLItems } - private func normalize(_ items: [RSOPMLItem], parentFolder: RSOPMLItem? = nil) { - var feedsToAdd = [RSOPMLItem]() + private func normalize(_ items: [OPMLItem], parentFolder: OPMLItem? = nil) { + var feedsToAdd = [OPMLItem]() for item in items { @@ -34,14 +33,14 @@ final class OPMLNormalizer { guard let _ = item.titleFromAttributes else { // Folder doesn’t have a name, so it won’t be created, and its items will go one level up. - if let itemChildren = item.children { + if let itemChildren = item.items { normalize(itemChildren, parentFolder: parentFolder) } continue } feedsToAdd.append(item) - if let itemChildren = item.children { + if let itemChildren = item.items { if let parentFolder = parentFolder { normalize(itemChildren, parentFolder: parentFolder) } else { @@ -52,8 +51,8 @@ final class OPMLNormalizer { if let parentFolder = parentFolder { for feed in feedsToAdd { - if !(parentFolder.children?.contains(where: { $0.feedSpecifier?.feedURL == feed.feedSpecifier?.feedURL}) ?? false) { - parentFolder.addChild(feed) + if !(parentFolder.items?.contains(where: { $0.feedSpecifier?.feedURL == feed.feedSpecifier?.feedURL}) ?? false) { + parentFolder.add(feed) } } } else { diff --git a/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift b/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift index fa78de31c..b96fbf432 100644 --- a/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift +++ b/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift @@ -33,7 +33,7 @@ final class ArticleSearchInfo: Hashable { } lazy var bodyForIndex: String = { - let s = preferredText.rsparser_stringByDecodingHTMLEntities() + let s = HTMLEntityDecoder.decodedString(preferredText) let sanitizedBody = s.strippingHTML().collapsingWhitespace if let authorsNames = authorsNames { diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift index 8f3b566a5..fea5368d4 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift @@ -10,7 +10,6 @@ import Foundation import os.log import Web import Parser -import ParserObjC import CloudKit import FoundationExtras @@ -57,12 +56,12 @@ enum CloudKitAccountZoneError: LocalizedError { migrateChangeToken() } - public func importOPML(rootExternalID: String, items: [RSOPMLItem]) async throws { + public func importOPML(rootExternalID: String, items: [OPMLItem]) async throws { var records = [CKRecord]() var feedRecords = [String: CKRecord]() - func processFeed(feedSpecifier: RSOPMLFeedSpecifier, containerExternalID: String) { + func processFeed(feedSpecifier: OPMLFeedSpecifier, containerExternalID: String) { if let feedRecord = feedRecords[feedSpecifier.feedURL], var containerExternalIDs = feedRecord[CloudKitFeed.Fields.containerExternalIDs] as? [String] { containerExternalIDs.append(containerExternalID) feedRecord[CloudKitFeed.Fields.containerExternalIDs] = containerExternalIDs @@ -80,7 +79,7 @@ enum CloudKitAccountZoneError: LocalizedError { if let title = item.titleFromAttributes { let containerRecord = newContainerCKRecord(name: title) records.append(containerRecord) - item.children?.forEach { itemChild in + item.items?.forEach { itemChild in if let feedSpecifier = itemChild.feedSpecifier { processFeed(feedSpecifier: feedSpecifier, containerExternalID: containerRecord.externalID) } @@ -289,7 +288,7 @@ enum CloudKitAccountZoneError: LocalizedError { private extension CloudKitAccountZone { - func newFeedCKRecord(feedSpecifier: RSOPMLFeedSpecifier, containerExternalID: String) -> CKRecord { + func newFeedCKRecord(feedSpecifier: OPMLFeedSpecifier, containerExternalID: String) -> CKRecord { let record = CKRecord(recordType: CloudKitFeed.recordType, recordID: generateRecordID()) record[CloudKitFeed.Fields.url] = feedSpecifier.feedURL diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift index e0c98cccd..c5aba8fa4 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift @@ -9,7 +9,6 @@ import Foundation import os.log import Parser -import ParserObjC import Web import CloudKit import Articles diff --git a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift index 738f7b465..3d8503c5c 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC import Web import CommonErrors import os.log @@ -56,7 +55,7 @@ public final class FeedFinder { throw AccountError.createErrorNotFound } - if FeedFinder.isFeed(data, url.absoluteString) { + if FeedFinder.isFeed(data) { logger.info("FeedFinder: is feed \(url)") let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1) return Set([feedSpecifier]) @@ -157,7 +156,7 @@ private extension FeedFinder { if let downloadData = try? await DownloadWithCacheManager.shared.download(url) { if let data = downloadData.data, let response = downloadData.response, response.statusIsOK { - if isFeed(data, downloadFeedSpecifier.urlString) { + if isFeed(data) { addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers) } } @@ -167,8 +166,7 @@ private extension FeedFinder { return Set(resultFeedSpecifiers.values) } - static func isFeed(_ data: Data, _ urlString: String) -> Bool { - let parserData = ParserData(url: urlString, data: data) - return FeedParser.canParse(parserData) + static func isFeed(_ data: Data) -> Bool { + return FeedParser.canParse(data) } } diff --git a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift index f755dbc20..6c51c9b2f 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift @@ -9,7 +9,6 @@ import Foundation import FoundationExtras import Parser -import ParserObjC private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"] @@ -22,18 +21,20 @@ class HTMLFeedFinder { private var feedSpecifiersDictionary = [String: FeedSpecifier]() init(parserData: ParserData) { - let metadata = RSHTMLMetadataParser.htmlMetadata(with: parserData) + let metadata = HTMLMetadataParser.metadata(with: parserData) var orderFound = 0 - for oneFeedLink in metadata.feedLinks { - if let oneURLString = oneFeedLink.urlString?.normalizedURL { - orderFound = orderFound + 1 - let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead, orderFound: orderFound) - addFeedSpecifier(oneFeedSpecifier) + if let feedLinks = metadata.feedLinks { + for oneFeedLink in feedLinks { + if let oneURLString = oneFeedLink.urlString?.normalizedURL { + orderFound = orderFound + 1 + let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead, orderFound: orderFound) + addFeedSpecifier(oneFeedSpecifier) + } } } - let bodyLinks = RSHTMLLinkParser.htmlLinks(with: parserData) + let bodyLinks = HTMLLinkParser.htmlLinks(with: parserData) for oneBodyLink in bodyLinks { if linkMightBeFeed(oneBodyLink), let normalizedURL = oneBodyLink.urlString?.normalizedURL { orderFound = orderFound + 1 @@ -71,7 +72,7 @@ private extension HTMLFeedFinder { return false } - func linkMightBeFeed(_ link: RSHTMLLink) -> Bool { + func linkMightBeFeed(_ link: HTMLLink) -> Bool { if let linkURLString = link.urlString, urlStringMightBeFeed(linkURLString) { return true } diff --git a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift index 809ea799c..a7675c585 100644 --- a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift +++ b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC public final class FeedbinEntry: Decodable, @unchecked Sendable { @@ -29,7 +28,7 @@ public final class FeedbinEntry: Decodable, @unchecked Sendable { // and letting the one date fail when parsed. public lazy var parsedDatePublished: Date? = { if let datePublished = datePublished { - return RSDateWithString(datePublished) + return DateParser.date(string: datePublished) } else { return nil diff --git a/Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift b/Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift new file mode 100644 index 000000000..ec4363c0f --- /dev/null +++ b/Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift @@ -0,0 +1,10 @@ +// +// Dictionary+Extensions.swift +// +// +// Created by Brent Simmons on 9/23/24. +// + +import Foundation + +public typealias StringDictionary = [String: String] diff --git a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift index 31670d7df..64ebf4b31 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift @@ -12,7 +12,7 @@ import Articles import Account import UniformTypeIdentifiers import Core -import ParserObjC +import Parser public extension Notification.Name { static let FaviconDidBecomeAvailable = Notification.Name("FaviconDidBecomeAvailableNotification") // userInfo key: FaviconDownloader.UserInfoKey.faviconURL @@ -22,7 +22,7 @@ public protocol FaviconDownloaderDelegate { @MainActor var appIconImage: IconImage? { get } - @MainActor func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata? + @MainActor func downloadMetadata(_ url: String) async throws -> HTMLMetadata? } @MainActor public final class FaviconDownloader { diff --git a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift index b3e99fa13..4c9689b18 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift @@ -9,7 +9,6 @@ import Foundation import CoreServices import Parser -import ParserObjC import UniformTypeIdentifiers // The favicon URLs may be specified in the head section of the home page. @@ -23,7 +22,7 @@ import UniformTypeIdentifiers /// - Parameters: /// - homePageURL: The page to search. /// - urls: An array of favicon URLs as strings. - static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> RSHTMLMetadata?)) async -> [String]? { + static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> HTMLMetadata?)) async -> [String]? { guard let _ = URL(string: homePageURL) else { return nil @@ -32,14 +31,14 @@ import UniformTypeIdentifiers // If the favicon has an explicit type, check that for an ignored type; otherwise, check the file extension. let htmlMetadata = try? await downloadMetadata(homePageURL) - let faviconURLs = htmlMetadata?.favicons.compactMap { favicon -> String? in + let faviconURLs = htmlMetadata?.favicons?.compactMap { favicon -> String? in shouldAllowFavicon(favicon) ? favicon.urlString : nil } return faviconURLs } - static func shouldAllowFavicon(_ favicon: RSHTMLMetadataFavicon) -> Bool { + static func shouldAllowFavicon(_ favicon: HTMLMetadataFavicon) -> Bool { // Check mime type. if let mimeType = favicon.type, let utType = UTType(mimeType: mimeType) { diff --git a/Modules/Images/Sources/Images/FeaturedImageDownloader.swift b/Modules/Images/Sources/Images/FeaturedImageDownloader.swift index 1746cde36..992c6902e 100644 --- a/Modules/Images/Sources/Images/FeaturedImageDownloader.swift +++ b/Modules/Images/Sources/Images/FeaturedImageDownloader.swift @@ -87,7 +87,7 @@ // } // } // -// func pullFeaturedImageURL(from metadata: RSHTMLMetadata, articleURL: String) { +// func pullFeaturedImageURL(from metadata: HTMLMetadata, articleURL: String) { // // if let url = metadata.bestFeaturedImageURL() { // cacheURL(for: articleURL, url) diff --git a/Modules/Images/Sources/Images/FeedIconDownloader.swift b/Modules/Images/Sources/Images/FeedIconDownloader.swift index 35066ff42..c833531ae 100644 --- a/Modules/Images/Sources/Images/FeedIconDownloader.swift +++ b/Modules/Images/Sources/Images/FeedIconDownloader.swift @@ -11,7 +11,6 @@ import Articles import Account import Web import Parser -import ParserObjC import Core public extension Notification.Name { @@ -23,7 +22,7 @@ public protocol FeedIconDownloaderDelegate: Sendable { @MainActor var appIconImage: IconImage? { get } - func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata? + func downloadMetadata(_ url: String) async throws -> HTMLMetadata? } @MainActor public final class FeedIconDownloader { @@ -218,7 +217,7 @@ private extension FeedIconDownloader { homePageToIconURLCacheDirty = true } - func findIconURLForHomePageURL(_ homePageURL: String, feed: Feed, downloadMetadata: @escaping (String) async throws -> RSHTMLMetadata?) { + func findIconURLForHomePageURL(_ homePageURL: String, feed: Feed, downloadMetadata: @escaping (String) async throws -> HTMLMetadata?) { guard !urlsInProgress.contains(homePageURL) else { return @@ -237,7 +236,7 @@ private extension FeedIconDownloader { } } - func pullIconURL(from metadata: RSHTMLMetadata, homePageURL: String, feed: Feed) { + func pullIconURL(from metadata: HTMLMetadata, homePageURL: String, feed: Feed) { if let url = metadata.bestWebsiteIconURL() { cacheIconURL(for: homePageURL, url) diff --git a/Modules/Images/Sources/Images/HTMLMetadata+Extension.swift b/Modules/Images/Sources/Images/HTMLMetadata+Extension.swift new file mode 100644 index 000000000..188791b61 --- /dev/null +++ b/Modules/Images/Sources/Images/HTMLMetadata+Extension.swift @@ -0,0 +1,67 @@ +// +// HTMLMetadata+Extension.swift +// NetNewsWire +// +// Created by Brent Simmons on 11/26/17. +// Copyright © 2017 Ranchero Software. All rights reserved. +// + +import Foundation +import Parser + +extension HTMLMetadata { + + func largestAppleTouchIcon() -> String? { + + guard let icons = appleTouchIcons, !icons.isEmpty else { + return nil + } + + var bestImage: HTMLMetadataAppleTouchIcon? = nil + + for image in icons { + + guard let imageSize = image.size else { + continue + } + if imageSize.width / imageSize.height > 2 { + continue + } + + guard let currentBestImage = bestImage, let bestImageSize = currentBestImage.size else { + bestImage = image + continue + } + + if imageSize.height > bestImageSize.height && imageSize.width > bestImageSize.width { + bestImage = image + } + } + + return bestImage?.urlString ?? icons.first?.urlString + } + + func bestWebsiteIconURL() -> String? { + + // TODO: metadata icons — sometimes they’re large enough to use here. + + if let appleTouchIcon = largestAppleTouchIcon() { + return appleTouchIcon + } + + if let openGraphImageURL = openGraphProperties?.image { + return openGraphImageURL.url + } + + return twitterProperties?.imageURL + } + + func bestFeaturedImageURL() -> String? { + + if let openGraphImageURL = openGraphProperties?.image { + return openGraphImageURL.url + } + + return twitterProperties?.imageURL + } +} diff --git a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift b/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift deleted file mode 100644 index d421d7f33..000000000 --- a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift +++ /dev/null @@ -1,99 +0,0 @@ -// -// RSHTMLMetadata+Extension.swift -// NetNewsWire -// -// Created by Brent Simmons on 11/26/17. -// Copyright © 2017 Ranchero Software. All rights reserved. -// - -import Foundation -import Parser -import ParserObjC - -extension RSHTMLMetadata { - - func largestOpenGraphImageURL() -> String? { - let openGraphImages = openGraphProperties.images - - guard !openGraphImages.isEmpty else { - return nil - } - - var bestImage: RSHTMLOpenGraphImage? = nil - - for image in openGraphImages { - if image.width / image.height > 2 { - continue - } - if bestImage == nil { - bestImage = image - continue - } - if image.height > bestImage!.height && image.width > bestImage!.width { - bestImage = image - } - } - - guard let url = bestImage?.secureURL ?? bestImage?.url else { - return nil - } - - // Bad ones we should ignore. - let badURLs = Set(["https://s0.wp.com/i/blank.jpg"]) - guard !badURLs.contains(url) else { - return nil - } - - return url - } - - func largestAppleTouchIcon() -> String? { - - let icons = appleTouchIcons - - guard !icons.isEmpty else { - return nil - } - - var bestImage: RSHTMLMetadataAppleTouchIcon? = nil - - for image in icons { - if image.size.width / image.size.height > 2 { - continue - } - if bestImage == nil { - bestImage = image - continue - } - if image.size.height > bestImage!.size.height && image.size.width > bestImage!.size.width { - bestImage = image; - } - } - - return bestImage?.urlString - } - - func bestWebsiteIconURL() -> String? { - - // TODO: metadata icons — sometimes they’re large enough to use here. - - if let appleTouchIcon = largestAppleTouchIcon() { - return appleTouchIcon - } - - if let openGraphImageURL = largestOpenGraphImageURL() { - return openGraphImageURL - } - - return twitterProperties.imageURL - } - - func bestFeaturedImageURL() -> String? { - - if let openGraphImageURL = largestOpenGraphImageURL() { - return openGraphImageURL - } - - return twitterProperties.imageURL - } -} diff --git a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift index e3a408108..a384fec4d 100644 --- a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift +++ b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC import Web public struct InitialFeedDownloader { @@ -23,8 +22,7 @@ public struct InitialFeedDownloader { return nil } - let parserData = ParserData(url: url.absoluteString, data: data) - guard let parsedFeed = try? await FeedParser.parse(parserData) else { + guard let parsedFeed = try? FeedParser.parse(urlString: url.absoluteString, data: data) else { return nil } diff --git a/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme b/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme index a99b3d79f..b8cfb4d23 100644 --- a/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme +++ b/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme @@ -29,6 +29,18 @@ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" shouldUseLaunchSchemeArgsEnv = "YES" shouldAutocreateTestPlan = "YES"> + + + + + + + + + + classNames + + DateParserTests + + testPubDateParsingPerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000131 + baselineIntegrationDisplayName + Local Baseline + + + testW3CParsingPerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000121 + baselineIntegrationDisplayName + Local Baseline + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist new file mode 100644 index 000000000..132668fff --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist @@ -0,0 +1,33 @@ + + + + + runDestinationsByUUID + + 78BB49A7-AEB4-40A1-83DA-EB9C5755E396 + + localComputer + + busSpeedInMHz + 0 + cpuCount + 1 + cpuKind + Apple M1 Max + cpuSpeedInMHz + 0 + logicalCPUCoresPerPackage + 10 + modelCode + Mac13,1 + physicalCPUCoresPerPackage + 10 + platformIdentifier + com.apple.platform.macosx + + targetArchitecture + arm64e + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist new file mode 100644 index 000000000..66ab5b65e --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist @@ -0,0 +1,52 @@ + + + + + classNames + + FeedParserTypeTests + + testFeedTypePerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000010 + baselineIntegrationDisplayName + Local Baseline + + + testFeedTypePerformance2() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000010 + baselineIntegrationDisplayName + Local Baseline + + + testFeedTypePerformance3() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000499 + baselineIntegrationDisplayName + Local Baseline + + + testFeedTypePerformance4() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000691 + baselineIntegrationDisplayName + Local Baseline + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist new file mode 100644 index 000000000..fee112bfa --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist @@ -0,0 +1,33 @@ + + + + + runDestinationsByUUID + + 9A7464E0-C633-49A0-871F-1F5206C35DE8 + + localComputer + + busSpeedInMHz + 0 + cpuCount + 1 + cpuKind + Apple M1 Max + cpuSpeedInMHz + 0 + logicalCPUCoresPerPackage + 10 + modelCode + Mac13,1 + physicalCPUCoresPerPackage + 10 + platformIdentifier + com.apple.platform.macosx + + targetArchitecture + arm64e + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist new file mode 100644 index 000000000..c392eb3bf --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist @@ -0,0 +1,22 @@ + + + + + classNames + + OPMLTests + + testOPMLParsingPerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.002870 + baselineIntegrationDisplayName + Local Baseline + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist new file mode 100644 index 000000000..c9ba9f70a --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist @@ -0,0 +1,33 @@ + + + + + runDestinationsByUUID + + 8F8BFCF6-AACD-45D7-B626-1B58CDE0924D + + localComputer + + busSpeedInMHz + 0 + cpuCount + 1 + cpuKind + Apple M1 Max + cpuSpeedInMHz + 0 + logicalCPUCoresPerPackage + 10 + modelCode + Mac13,1 + physicalCPUCoresPerPackage + 10 + platformIdentifier + com.apple.platform.macosx + + targetArchitecture + arm64e + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme new file mode 100644 index 000000000..f37cf2377 --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme @@ -0,0 +1,175 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme deleted file mode 100644 index ed73f5e1b..000000000 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 3d831e4d8..e9aaf44ff 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -11,25 +11,28 @@ let package = Package( .library( name: "Parser", type: .dynamic, - targets: ["Parser"]), + targets: ["Parser"]) ], dependencies: [ - .package(path: "../ParserObjC"), + .package(path: "../FoundationExtras"), ], targets: [ // Targets are the basic building blocks of a package. A target can define a module or a test suite. // Targets can depend on other targets in this package, and on products in packages this package depends on. .target( name: "Parser", - dependencies: ["ParserObjC"], + dependencies: [ + "FoundationExtras", + ], swiftSettings: [ .enableExperimentalFeature("StrictConcurrency") ]), .testTarget( name: "ParserTests", - dependencies: ["Parser", "ParserObjC"], + dependencies: [ + "Parser" + ], exclude: ["Info.plist"], resources: [.copy("Resources")]), ] ) - diff --git a/Modules/Parser/Sources/Parser/DateParser/DateParser.swift b/Modules/Parser/Sources/Parser/DateParser/DateParser.swift new file mode 100644 index 000000000..f2d5d0ac4 --- /dev/null +++ b/Modules/Parser/Sources/Parser/DateParser/DateParser.swift @@ -0,0 +1,587 @@ +// +// DateParser.swift +// +// +// Created by Brent Simmons on 8/28/24. +// + +import Foundation + +public final class DateParser { + + // MARK: - Public API + + /// Parse W3C and pubDate dates — used for feed parsing. + /// This is a fast alternative to system APIs + /// for parsing dates. + public static func date(data: Data) -> Date? { + + let numberOfBytes = data.count + + // Make sure it’s in reasonable range for a date string. + if numberOfBytes < 6 || numberOfBytes > 150 { + return nil + } + + return data.withUnsafeBytes { bytes in + let buffer = bytes.bindMemory(to: UInt8.self) + + if dateIsW3CDate(buffer, numberOfBytes) { + return parseW3CDate(buffer, numberOfBytes) + } + else if dateIsPubDate(buffer, numberOfBytes) { + return parsePubDate(buffer, numberOfBytes) + } + + // Fallback, in case our detection fails. + return parseW3CDate(buffer, numberOfBytes) + } + } + + public static func date(string: String) -> Date? { + + guard let data = string.data(using: .utf8) else { + return nil + } + return date(data: data) + } + + private typealias DateBuffer = UnsafeBufferPointer + + // See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list + private static let timeZoneTable: [String: Int] = [ + + "GMT": timeZoneOffset(0, 0), + "UTC": timeZoneOffset(0, 0), + "PDT": timeZoneOffset(-7, 0), + "PST": timeZoneOffset(-8, 0), + "EST": timeZoneOffset(-5, 0), + "EDT": timeZoneOffset(-4, 0), + "MDT": timeZoneOffset(-6, 0), + "MST": timeZoneOffset(-7, 0), + "CST": timeZoneOffset(-6, 0), + "CDT": timeZoneOffset(-5, 0), + "ACT": timeZoneOffset(-8, 0), + "AFT": timeZoneOffset(4, 30), + "AMT": timeZoneOffset(4, 0), + "ART": timeZoneOffset(-3, 0), + "AST": timeZoneOffset(3, 0), + "AZT": timeZoneOffset(4, 0), + "BIT": timeZoneOffset(-12, 0), + "BDT": timeZoneOffset(8, 0), + "ACST": timeZoneOffset(9, 30), + "AEST": timeZoneOffset(10, 0), + "AKST": timeZoneOffset(-9, 0), + "AMST": timeZoneOffset(5, 0), + "AWST": timeZoneOffset(8, 0), + "AZOST": timeZoneOffset(-1, 0), + "BIOT": timeZoneOffset(6, 0), + "BRT": timeZoneOffset(-3, 0), + "BST": timeZoneOffset(6, 0), + "BTT": timeZoneOffset(6, 0), + "CAT": timeZoneOffset(2, 0), + "CCT": timeZoneOffset(6, 30), + "CET": timeZoneOffset(1, 0), + "CEST": timeZoneOffset(2, 0), + "CHAST": timeZoneOffset(12, 45), + "ChST": timeZoneOffset(10, 0), + "CIST": timeZoneOffset(-8, 0), + "CKT": timeZoneOffset(-10, 0), + "CLT": timeZoneOffset(-4, 0), + "CLST": timeZoneOffset(-3, 0), + "COT": timeZoneOffset(-5, 0), + "COST": timeZoneOffset(-4, 0), + "CVT": timeZoneOffset(-1, 0), + "CXT": timeZoneOffset(7, 0), + "EAST": timeZoneOffset(-6, 0), + "EAT": timeZoneOffset(3, 0), + "ECT": timeZoneOffset(-4, 0), + "EEST": timeZoneOffset(3, 0), + "EET": timeZoneOffset(2, 0), + "FJT": timeZoneOffset(12, 0), + "FKST": timeZoneOffset(-4, 0), + "GALT": timeZoneOffset(-6, 0), + "GET": timeZoneOffset(4, 0), + "GFT": timeZoneOffset(-3, 0), + "GILT": timeZoneOffset(7, 0), + "GIT": timeZoneOffset(-9, 0), + "GST": timeZoneOffset(-2, 0), + "GYT": timeZoneOffset(-4, 0), + "HAST": timeZoneOffset(-10, 0), + "HKT": timeZoneOffset(8, 0), + "HMT": timeZoneOffset(5, 0), + "IRKT": timeZoneOffset(8, 0), + "IRST": timeZoneOffset(3, 30), + "IST": timeZoneOffset(2, 0), + "JST": timeZoneOffset(9, 0), + "KRAT": timeZoneOffset(7, 0), + "KST": timeZoneOffset(9, 0), + "LHST": timeZoneOffset(10, 30), + "LINT": timeZoneOffset(14, 0), + "MAGT": timeZoneOffset(11, 0), + "MIT": timeZoneOffset(-9, 30), + "MSK": timeZoneOffset(3, 0), + "MUT": timeZoneOffset(4, 0), + "NDT": timeZoneOffset(-2, 30), + "NFT": timeZoneOffset(11, 30), + "NPT": timeZoneOffset(5, 45), + "NT": timeZoneOffset(-3, 30), + "OMST": timeZoneOffset(6, 0), + "PETT": timeZoneOffset(12, 0), + "PHOT": timeZoneOffset(13, 0), + "PKT": timeZoneOffset(5, 0), + "RET": timeZoneOffset(4, 0), + "SAMT": timeZoneOffset(4, 0), + "SAST": timeZoneOffset(2, 0), + "SBT": timeZoneOffset(11, 0), + "SCT": timeZoneOffset(4, 0), + "SLT": timeZoneOffset(5, 30), + "SST": timeZoneOffset(8, 0), + "TAHT": timeZoneOffset(-10, 0), + "THA": timeZoneOffset(7, 0), + "UYT": timeZoneOffset(-3, 0), + "UYST": timeZoneOffset(-2, 0), + "VET": timeZoneOffset(-4, 30), + "VLAT": timeZoneOffset(10, 0), + "WAT": timeZoneOffset(1, 0), + "WET": timeZoneOffset(0, 0), + "WEST": timeZoneOffset(1, 0), + "YAKT": timeZoneOffset(9, 0), + "YEKT": timeZoneOffset(5, 0) + ] +} + +// MARK: - Private + +private extension DateParser { + + struct DateCharacter { + + static let space = Character(" ").asciiValue! + static let `return` = Character("\r").asciiValue! + static let newline = Character("\n").asciiValue! + static let tab = Character("\t").asciiValue! + static let hyphen = Character("-").asciiValue! + static let comma = Character(",").asciiValue! + static let dot = Character(".").asciiValue! + static let colon = Character(":").asciiValue! + static let plus = Character("+").asciiValue! + static let minus = Character("-").asciiValue! + static let A = Character("A").asciiValue! + static let a = Character("a").asciiValue! + static let D = Character("D").asciiValue! + static let d = Character("d").asciiValue! + static let F = Character("F").asciiValue! + static let f = Character("f").asciiValue! + static let J = Character("J").asciiValue! + static let j = Character("j").asciiValue! + static let M = Character("M").asciiValue! + static let m = Character("m").asciiValue! + static let N = Character("N").asciiValue! + static let n = Character("n").asciiValue! + static let O = Character("O").asciiValue! + static let o = Character("o").asciiValue! + static let S = Character("S").asciiValue! + static let s = Character("s").asciiValue! + static let U = Character("U").asciiValue! + static let u = Character("u").asciiValue! + static let Y = Character("Y").asciiValue! + static let y = Character("y").asciiValue! + static let Z = Character("Z").asciiValue! + static let z = Character("z").asciiValue! + } + + enum Month: Int { + + case January = 1, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December + } + + // MARK: - Standard Formats + + private static func dateIsW3CDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Bool { + + // Something like 2010-11-17T08:40:07-05:00 + // But might be missing T character in the middle. + // Looks for four digits in a row followed by a -. + + for i in 0.. Bool { + + for ch in bytes { + if ch == DateCharacter.space || ch == DateCharacter.comma { + return true + } + } + + return false + } + + private static func parseW3CDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Date? { + + /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" + @"yyyy-MM-dd'T'HH:mm:sszzz" + @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" + etc.*/ + + var finalIndex = 0 + + guard let year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex) else { + return nil + } + guard let month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) else { + return nil + } + guard let day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) else { + return nil + } + let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + + var currentIndex = finalIndex + 1 + + let milliseconds = { + var ms = 0 + let hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.dot) + if hasMilliseconds { + ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex) ?? 00 + currentIndex = finalIndex + 1 + } + + // Ignore more than 3 digits of precision + while currentIndex < numberOfBytes && isDigit(bytes[currentIndex]) { + currentIndex += 1 + } + + return ms + }() + + let timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex) + + return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset) + } + + private static func parsePubDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Date? { + + var finalIndex = 0 + + let day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex) ?? 1 + let month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex) ?? .January + + guard let year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex) else { + return nil + } + + let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + + var currentIndex = finalIndex + 1 + + let second = { + var s = 0 + let hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.colon) + if hasSeconds { + s = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex) ?? 0 + } + return s + }() + + currentIndex = finalIndex + 1 + + let timeZoneOffset = { + var offset = 0 + let hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.space) + if hasTimeZone { + offset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex) + } + return offset + }() + + return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month.rawValue, day, hour, minute, second, 0, timeZoneOffset) + } + + // MARK: - Date Creation + + static func dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int, _ timeZoneOffset: Int) -> Date? { + + var timeInfo = tm() + timeInfo.tm_sec = CInt(second) + timeInfo.tm_min = CInt(minute) + timeInfo.tm_hour = CInt(hour) + timeInfo.tm_mday = CInt(day) + timeInfo.tm_mon = CInt(month - 1) //It's 1-based coming in + timeInfo.tm_year = CInt(year - 1900) //see time.h -- it's years since 1900 + timeInfo.tm_wday = -1 + timeInfo.tm_yday = -1 + timeInfo.tm_isdst = -1 + timeInfo.tm_gmtoff = 0; + timeInfo.tm_zone = nil; + + let rawTime = timegm(&timeInfo) - timeZoneOffset + if rawTime == time_t(UInt32.max) { + + // NSCalendar is super-amazingly slow (which is partly why this parser exists), + // so this is used only when the date is far enough in the future + // (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. + // Hopefully by the time we consistently need dates that far in the future + // the performance of NSCalendar won’t be an issue. + + var dateComponents = DateComponents() + + dateComponents.timeZone = TimeZone(secondsFromGMT: timeZoneOffset) + dateComponents.year = year + dateComponents.month = month + dateComponents.day = day + dateComponents.hour = hour + dateComponents.minute = minute + dateComponents.second = second + dateComponents.nanosecond = milliseconds * 1000000 + + return Calendar.autoupdatingCurrent.date(from: dateComponents) + } + + var timeInterval = TimeInterval(rawTime) + if milliseconds > 0 { + timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0) + } + + return Date(timeIntervalSince1970: timeInterval) + } + + // MARK: - Time Zones and Offsets + + private static func parsedTimeZoneOffset(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int) -> Int { + + var timeZoneCharacters: [UInt8] = [0, 0, 0, 0, 0, 0] // nil-terminated last character + var numberOfCharactersFound = 0 + var hasAtLeastOneAlphaCharacter = false + + for i in startingIndex..= 5 { + break + } + } + + if numberOfCharactersFound < 1 || timeZoneCharacters[0] == DateCharacter.Z || timeZoneCharacters[0] == DateCharacter.z { + return 0 + } + + if hasAtLeastOneAlphaCharacter { + return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters) ?? 0 + } + return offsetInSecondsForOffsetCharacters(timeZoneCharacters) + } + + private static func offsetInSecondsForOffsetCharacters(_ timeZoneCharacters: [UInt8]) -> Int { + + let isPlus = timeZoneCharacters[0] == DateCharacter.plus + var finalIndex = 0 + let numberOfCharacters = strlen(timeZoneCharacters) + + return timeZoneCharacters.withUnsafeBufferPointer { bytes in + let hours = nextNumericValue(bytes, numberOfCharacters, 0, 2, &finalIndex) ?? 0 + let minutes = nextNumericValue(bytes, numberOfCharacters, finalIndex + 1, 2, &finalIndex) ?? 0 + + if hours == 0 && minutes == 0 { + return 0 + } + + var seconds = (hours * 60 * 60) + (minutes * 60) + if !isPlus { + seconds = 0 - seconds + } + + return seconds + } + } + + /// Returns offset in seconds. + static func timeZoneOffset(_ hours: Int, _ minutes: Int) -> Int { + + if hours < 0 { + return (hours * 60 * 60) - (minutes * 60) + } + return (hours * 60 * 60) + (minutes * 60) + } + + private static func offsetInSecondsForTimeZoneAbbreviation(_ abbreviation: [UInt8]) -> Int? { + + let name = String(cString: abbreviation) + return timeZoneTable[name] + } + + // MARK: - Parser + + private static func nextMonthValue(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ finalIndex: inout Int) -> DateParser.Month? { + + // Lots of short-circuits here. Not strict. + + var numberOfAlphaCharactersFound = 0 + var monthCharacters: [CChar] = [0, 0, 0] + + for i in startingIndex.. 0 { + break + } + } + + numberOfAlphaCharactersFound+=1 + if numberOfAlphaCharactersFound == 1 { + if ch == DateCharacter.F || ch == DateCharacter.f { + return .February + } + if ch == DateCharacter.S || ch == DateCharacter.s { + return .September + } + if ch == DateCharacter.O || ch == DateCharacter.o { + return .October + } + if ch == DateCharacter.N || ch == DateCharacter.n { + return .November + } + if ch == DateCharacter.D || ch == DateCharacter.d { + return .December + } + } + + monthCharacters[numberOfAlphaCharactersFound - 1] = CChar(ch) + if numberOfAlphaCharactersFound >= 3 { + break + } + } + + if numberOfAlphaCharactersFound < 2 { + return nil + } + + if monthCharacters[0] == DateCharacter.J || monthCharacters[0] == DateCharacter.j { // Jan, Jun, Jul + if monthCharacters[1] == DateCharacter.A || monthCharacters[1] == DateCharacter.a { + return .January + } + if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u { + if monthCharacters[2] == DateCharacter.N || monthCharacters[2] == DateCharacter.n { + return .June + } + return .July + } + return .January + } + + if monthCharacters[0] == DateCharacter.M || monthCharacters[0] == DateCharacter.m { // March, May + if monthCharacters[2] == DateCharacter.Y || monthCharacters[2] == DateCharacter.y { + return .May + } + return .March + } + + if monthCharacters[0] == DateCharacter.A || monthCharacters[0] == DateCharacter.a { // April, August + if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u { + return .August + } + return .April + } + + return .January // Should never get here (but possibly do) + } + + private static func nextNumericValue(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ maximumNumberOfDigits: Int, _ finalIndex: inout Int) -> Int? { + + // Maximum for the maximum is 4 (for time zone offsets and years) + assert(maximumNumberOfDigits > 0 && maximumNumberOfDigits <= 4) + + var numberOfDigitsFound = 0 + var digits = [0, 0, 0, 0] + + for i in startingIndex.. 0 { + break + } + + digits[numberOfDigitsFound] = ch - 48; // '0' is 48 + numberOfDigitsFound+=1 + if numberOfDigitsFound >= maximumNumberOfDigits { + break + } + } + + if numberOfDigitsFound < 1 { + return nil + } + + if numberOfDigitsFound == 1 { + return digits[0] + } + if numberOfDigitsFound == 2 { + return (digits[0] * 10) + digits[1] + } + if numberOfDigitsFound == 3 { + return (digits[0] * 100) + (digits[1] * 10) + digits[2] + } + return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3] + } + + static func isDigit(_ ch: T) -> Bool { + + return isdigit(Int32(ch)) != 0 + } + + static func isAlpha(_ ch: T) -> Bool { + + return isalpha(Int32(ch)) != 0 + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift new file mode 100644 index 000000000..ea91797bb --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift @@ -0,0 +1,57 @@ +// +// FeedParser.swift +// RSParser +// +// Created by Brent Simmons on 6/20/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. +// You don’t need to know the type of feed. + +public struct FeedParser { + + public static func canParse(_ data: Data) -> Bool { + + let type = FeedType.feedType(data) + + switch type { + case .jsonFeed, .rssInJSON, .rss, .atom: + return true + default: + return false + } + } + + public static func parse(urlString: String, data: Data) throws -> ParsedFeed? { + + let type = FeedType.feedType(data) + + switch type { + + case .jsonFeed: + return try JSONFeedParser.parse(urlString: urlString, data: data) + + case .rssInJSON: + return try RSSInJSONParser.parse(urlString: urlString, data: data) + + case .rss: + let feed = RSSParser.parsedFeed(urlString: urlString, data: data) + return RSSFeedTransformer.parsedFeed(with: feed, feedType: .rss) + + case .atom: + let feed = AtomParser.parsedFeed(urlString: urlString, data: data) + return RSSFeedTransformer.parsedFeed(with: feed, feedType: .atom) + + case .unknown, .notAFeed: + return nil + } + } + + public static func parseAsync(urlString: String, data: Data) async throws -> ParsedFeed? { + + try parse(urlString: urlString, data: data) + } +} diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedParserError.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParserError.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/FeedParserError.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParserError.swift diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift new file mode 100644 index 000000000..21d70539d --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift @@ -0,0 +1,148 @@ +// +// FeedType.swift +// RSParser +// +// Created by Brent Simmons on 6/20/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +public enum FeedType: Sendable { + + case rss + case atom + case jsonFeed + case rssInJSON + case unknown + case notAFeed + + private static let minNumberOfBytesRequired = 128 + + static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType { + + // Can call with partial data — while still downloading, for instance. + // If there’s not enough data, return .unknown. Ask again when there’s more data. + // If it’s definitely not a feed, return .notAFeed. + + let count = data.count + if count < minNumberOfBytesRequired { + return .unknown + } + + return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in + + guard let baseAddress = pointer.baseAddress else { + return .unknown + } + let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self) + + if isProbablyJSON(cCharPointer, count) { + + if isPartialData { + // Might not be able to detect a JSON Feed without all data. + // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) + // has, at this writing, the JSON version element at the end of the feed, + // which is totally legal — but it means not being able to detect + // that it’s a JSON Feed without all the data. + // So this returns .unknown instead of .notAFeed. + return .unknown + } + + if isProbablyJSONFeed(cCharPointer, count) { + return .jsonFeed + } + if isProbablyRSSInJSON(cCharPointer, count) { + return .rssInJSON + } + } + + if isProbablyRSS(cCharPointer, count) { + return .rss + } + if isProbablyAtom(cCharPointer, count) { + return .atom + } + + return .notAFeed + } + } +} + +private extension FeedType { + + static func isProbablyRSS(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + if didFindString("", bytes, count) && didFindString("", bytes, count) + } + + static func isProbablyAtom(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + didFindString(", _ count: Int) -> Bool { + + bytesStartWithStringIgnoringWhitespace("{", bytes, count) + } + + static func isProbablyJSONFeed(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + // Assumes already called `isProbablyJSON` and it returned true. + didFindString("://jsonfeed.org/version/", bytes, count) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", bytes, count) + } + + static func isProbablyRSSInJSON(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + // Assumes already called `isProbablyJSON` and it returned true. + didFindString("rss", bytes, count) && didFindString("channel", bytes, count) && didFindString("item", bytes, count) + } + + static func didFindString(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { + + let foundString = strnstr(bytes, string, numberOfBytes) + return foundString != nil + } + + struct Whitespace { + static let space = Character(" ").asciiValue! + static let `return` = Character("\r").asciiValue! + static let newline = Character("\n").asciiValue! + static let tab = Character("\t").asciiValue! + } + + static func bytesStartWithStringIgnoringWhitespace(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { + + var i = 0 + + while i < numberOfBytes { + + let ch = bytes[i] + + if ch == Whitespace.space || ch == Whitespace.return || ch == Whitespace.newline || ch == Whitespace.tab { + i += 1 + continue + } + + if ch == string[0] { + if let found = strnstr(bytes, string, numberOfBytes) { + return found == bytes + i + } + } + + // Allow for a BOM of up to four bytes (assuming BOM is only at the start) + if i < 4 { + i += 1 + continue + } + + break + } + + return false + } +} diff --git a/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/JSONFeedParser.swift similarity index 95% rename from Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/JSONFeedParser.swift index 733f0f92e..a8665af2e 100644 --- a/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -7,9 +7,6 @@ // import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif // See https://jsonfeed.org/version/1.1 @@ -52,9 +49,9 @@ public struct JSONFeedParser { static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + public static func parse(urlString: String, data: Data) throws -> ParsedFeed? { - guard let d = JSONUtilities.dictionary(with: parserData.data) else { + guard let d = JSONUtilities.dictionary(with: data) else { throw FeedParserError(.invalidJSON) } @@ -70,7 +67,7 @@ public struct JSONFeedParser { let authors = parseAuthors(d) let homePageURL = d[Key.homePageURL] as? String - let feedURL = d[Key.feedURL] as? String ?? parserData.url + let feedURL = d[Key.feedURL] as? String ?? urlString let feedDescription = d[Key.feedDescription] as? String let nextURL = d[Key.nextURL] as? String let iconURL = d[Key.icon] as? String @@ -79,7 +76,7 @@ public struct JSONFeedParser { let hubs = parseHubs(d) let language = d[Key.language] as? String - let items = parseItems(itemsArray, parserData.url) + let items = parseItems(itemsArray, urlString) return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items) } @@ -179,7 +176,7 @@ private extension JSONFeedParser { } if isSpecialCaseTitleWithEntitiesFeed(feedURL) { - return (title as NSString).rsparser_stringByDecodingHTMLEntities() + return HTMLEntityDecoder.decodedString(title) } return title @@ -221,7 +218,7 @@ private extension JSONFeedParser { guard let dateString = dateString, !dateString.isEmpty else { return nil } - return RSDateWithString(dateString) + return DateParser.date(string: dateString) } static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { diff --git a/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/RSSInJSONParser.swift similarity index 94% rename from Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/RSSInJSONParser.swift index ad484f6b9..39f9623a7 100644 --- a/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/RSSInJSONParser.swift @@ -7,19 +7,16 @@ // import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif // See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md // Also: http://cyber.harvard.edu/rss/rss.html public struct RSSInJSONParser { - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + public static func parse(urlString: String, data: Data) throws -> ParsedFeed? { do { - guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { + guard let parsedObject = try JSONSerialization.jsonObject(with: data) as? JSONDictionary else { throw FeedParserError(.invalidJSON) } guard let rssObject = parsedObject["rss"] as? JSONDictionary else { @@ -47,11 +44,11 @@ public struct RSSInJSONParser { let title = channelObject["title"] as? String let homePageURL = channelObject["link"] as? String - let feedURL = parserData.url + let feedURL = urlString let feedDescription = channelObject["description"] as? String let feedLanguage = channelObject["language"] as? String - let items = parseItems(itemsObject!, parserData.url) + let items = parseItems(itemsObject!, urlString) return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) @@ -87,7 +84,7 @@ private extension RSSInJSONParser { var datePublished: Date? = nil if let datePublishedString = itemDictionary["pubDate"] as? String { - datePublished = RSDateWithString(datePublishedString) + datePublished = DateParser.date(string: datePublishedString) } let authors = parseAuthors(itemDictionary) @@ -127,7 +124,7 @@ private extension RSSInJSONParser { s = contentText! } } - uniqueID = (s as NSString).rsparser_md5Hash() + uniqueID = s.md5String } if let uniqueID = uniqueID { diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAttachment.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAttachment.swift similarity index 68% rename from Modules/Parser/Sources/Parser/Feeds/ParsedAttachment.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAttachment.swift index 48b2bd64c..a3c3cb4f5 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedAttachment.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAttachment.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedAttachment: Hashable, Sendable { +public final class ParsedAttachment: Hashable, Sendable { public let url: String public let mimeType: String? @@ -33,4 +33,10 @@ public struct ParsedAttachment: Hashable, Sendable { public func hash(into hasher: inout Hasher) { hasher.combine(url) } + + // MARK: - Equatable + + public static func ==(lhs: ParsedAttachment, rhs: ParsedAttachment) -> Bool { + lhs.url == rhs.url && lhs.mimeType == rhs.mimeType && lhs.title == rhs.title && lhs.sizeInBytes == rhs.sizeInBytes && lhs.durationInSeconds == rhs.durationInSeconds + } } diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAuthor.swift similarity index 50% rename from Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAuthor.swift index 7b7d5165e..260eece16 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAuthor.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedAuthor: Hashable, Codable, Sendable { +public final class ParsedAuthor: Hashable, Codable, Sendable { public let name: String? public let url: String? @@ -22,6 +22,18 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { self.emailAddress = emailAddress } + /// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.) + convenience init(singleString: String) { + + if singleString.contains("@") { + self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) + } else if singleString.lowercased().hasPrefix("http") { + self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) + } else { + self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) + } + } + // MARK: - Hashable public func hash(into hasher: inout Hasher) { @@ -41,4 +53,11 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { hasher.combine("") } } + + // MARK: - Equatable + + public static func ==(lhs: ParsedAuthor, rhs: ParsedAuthor) -> Bool { + + lhs.name == rhs.name && lhs.url == rhs.url && lhs.avatarURL == rhs.avatarURL && lhs.emailAddress == rhs.emailAddress + } } diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedFeed.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedFeed.swift similarity index 96% rename from Modules/Parser/Sources/Parser/Feeds/ParsedFeed.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedFeed.swift index 639d86ab4..b19e9b03c 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedFeed.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedFeed.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedFeed: Sendable { +public final class ParsedFeed: Sendable { public let type: FeedType public let title: String? diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedHub.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedHub.swift new file mode 100644 index 000000000..abd687467 --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedHub.swift @@ -0,0 +1,33 @@ +// +// ParsedHub.swift +// RSParser +// +// Created by Brent Simmons on 6/20/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +public final class ParsedHub: Hashable, Sendable { + + public let type: String + public let url: String + + init(type: String, url: String) { + self.type = type + self.url = url + } + + // MARK: - Hashable + + public func hash(into hasher: inout Hasher) { + hasher.combine(type) + hasher.combine(url) + } + + // MARK: - Equatable + + public static func ==(lhs: ParsedHub, rhs: ParsedHub) -> Bool { + lhs.type == rhs.type && lhs.url == rhs.url + } +} diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedItem.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedItem.swift similarity index 73% rename from Modules/Parser/Sources/Parser/Feeds/ParsedItem.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedItem.swift index 40c719be1..2c3057835 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedItem.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedItem.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedItem: Hashable, Sendable { +public final class ParsedItem: Hashable, Sendable { public let syncServiceID: String? //Nil when not syncing public let uniqueID: String //RSS guid, for instance; may be calculated @@ -63,5 +63,10 @@ public struct ParsedItem: Hashable, Sendable { hasher.combine(feedURL) } } + + public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool { + + lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title && lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments + } } diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/AtomParser.swift new file mode 100644 index 000000000..ef7002390 --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/AtomParser.swift @@ -0,0 +1,444 @@ +// +// AtomParser.swift +// RSParser +// +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation +import FoundationExtras + +final class AtomParser { + + private var feedURL: String + private let data: Data + private let feed: RSSFeed + + private var articles = [RSSArticle]() + private var currentArticle: RSSArticle? { + articles.last + } + + private var attributesStack = [StringDictionary]() + private var currentAttributes: StringDictionary? { + attributesStack.last + } + + private var parsingXHTML = false + private var xhtmlString: String? + + private var currentAuthor: RSSAuthor? + private var parsingAuthor = false + + private var parsingArticle = false + private var parsingSource = false + private var endFeedFound = false + + static func parsedFeed(urlString: String, data: Data) -> RSSFeed { + + let parser = AtomParser(urlString: urlString, data: data) + parser.parse() + return parser.feed + } + + init(urlString: String, data: Data) { + self.feedURL = urlString + self.data = data + self.feed = RSSFeed(urlString: urlString) + } +} + +private extension AtomParser { + + func parse() { + + let saxParser = SAXParser(delegate: self, data: data) + saxParser.parse() + feed.articles = articles + } + + private struct XMLName { + static let entry = "entry".utf8CString + static let content = "content".utf8CString + static let summary = "summary".utf8CString + static let link = "link".utf8CString + static let feed = "feed".utf8CString + static let source = "source".utf8CString + static let author = "author".utf8CString + static let name = "name".utf8CString + static let email = "email".utf8CString + static let uri = "uri".utf8CString + static let title = "title".utf8CString + static let id = "id".utf8CString + static let published = "published".utf8CString + static let updated = "updated".utf8CString + static let issued = "issued".utf8CString + static let modified = "modified".utf8CString + } + + private struct XMLString { + static let rel = "rel" + static let alternate = "alternate" + static let related = "related" + static let enclosure = "enclosure" + static let href = "href" + static let title = "title" + static let type = "type" + static let length = "length" + static let xmlLang = "xml:lang" + } + + func currentString(_ saxParser: SAXParser) -> String? { + + saxParser.currentStringWithTrimmedWhitespace + } + + func currentDate(_ saxParser: SAXParser) -> Date? { + + guard let data = saxParser.currentCharacters else { + assertionFailure("Unexpected nil saxParser.currentCharacters in AtomParser.currentDate") + return nil + } + + return DateParser.date(data: data) + } + + func addFeedTitle(_ saxParser: SAXParser) { + + guard feed.title == nil else { + return + } + + if let title = currentString(saxParser), !title.isEmpty { + feed.title = title + } + } + + func addFeedLink() { + + guard feed.link == nil, let currentAttributes else { + return + } + + if let related = currentAttributes[XMLString.rel], related == XMLString.alternate { + feed.link = currentAttributes[XMLString.href] + } + } + + func addFeedLanguage() { + + guard feed.language == nil, let currentAttributes else { + return + } + + feed.language = currentAttributes[XMLString.xmlLang] + } + + func addArticle() { + let article = RSSArticle(feedURL) + articles.append(article) + } + + func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { + + guard prefix == nil else { + return + } + guard let currentArticle else { + assertionFailure("currentArticle must not be nil in AtomParser.addArticleElement") + return + } + + if SAXEqualTags(localName, XMLName.id) { + currentArticle.guid = currentString(saxParser) + } + + else if SAXEqualTags(localName, XMLName.title) { + currentArticle.title = currentString(saxParser) + } + + else if SAXEqualTags(localName, XMLName.content) { + addContent(saxParser, currentArticle) + } + + else if SAXEqualTags(localName, XMLName.summary) { + addSummary(saxParser, currentArticle) + } + + else if SAXEqualTags(localName, XMLName.link) { + addLink(currentArticle) + } + + else if SAXEqualTags(localName, XMLName.published) { + currentArticle.datePublished = currentDate(saxParser) + } + + else if SAXEqualTags(localName, XMLName.updated) { + currentArticle.dateModified = currentDate(saxParser) + } + + // Atom 0.3 dates + else if SAXEqualTags(localName, XMLName.issued) { + if currentArticle.datePublished == nil { + currentArticle.datePublished = currentDate(saxParser) + } + } + else if SAXEqualTags(localName, XMLName.modified) { + if currentArticle.dateModified == nil { + currentArticle.dateModified = currentDate(saxParser) + } + } + } + + func addContent(_ saxParser: SAXParser, _ article: RSSArticle) { + + article.body = currentString(saxParser) + } + + func addSummary(_ saxParser: SAXParser, _ article: RSSArticle) { + + guard article.body == nil else { + return + } + article.body = currentString(saxParser) + } + + func addLink(_ article: RSSArticle) { + + guard let attributes = currentAttributes else { + return + } + guard let urlString = attributes[XMLString.href], !urlString.isEmpty else { + return + } + + var rel = attributes[XMLString.rel] + if rel?.isEmpty ?? true { + rel = XMLString.alternate + } + + if rel == XMLString.related { + if article.link == nil { + article.link = urlString + } + } + else if rel == XMLString.alternate { + if article.permalink == nil { + article.permalink = urlString + } + } + else if rel == XMLString.enclosure { + if let enclosure = enclosure(urlString, attributes) { + article.addEnclosure(enclosure) + } + } + } + + func enclosure(_ urlString: String, _ attributes: StringDictionary) -> RSSEnclosure? { + + let enclosure = RSSEnclosure(url: urlString) + enclosure.title = attributes[XMLString.title] + enclosure.mimeType = attributes[XMLString.type] + + if let lengthString = attributes[XMLString.length] { + enclosure.length = Int(lengthString) + } + + return enclosure + } + + func addXHTMLTag(_ localName: XMLPointer) { + + guard var xhtmlString else { + assertionFailure("xhtmlString must not be nil when in addXHTMLTag.") + return + } + + guard let name = String(xmlPointer: localName) else { + assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.") + return + } + + xhtmlString.append("<") + xhtmlString.append(name) + + if let currentAttributes, currentAttributes.count > 0 { + for (key, value) in currentAttributes { + xhtmlString.append(" ") + xhtmlString.append(key) + xhtmlString.append("=\"") + + let encodedValue = value.replacingOccurrences(of: "\"", with: """) + xhtmlString.append(encodedValue) + xhtmlString.append("\"") + } + } + + xhtmlString.append(">") + } +} + +extension AtomParser: SAXParserDelegate { + + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + + if endFeedFound { + return + } + + let xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) ?? StringDictionary() + attributesStack.append(xmlAttributes) + + if parsingXHTML { + addXHTMLTag(localName) + return + } + + if SAXEqualTags(localName, XMLName.entry) { + parsingArticle = true + addArticle() + return + } + + if SAXEqualTags(localName, XMLName.author) { + parsingAuthor = true + currentAuthor = RSSAuthor() + return + } + + if SAXEqualTags(localName, XMLName.source) { + parsingSource = true + return + } + + let isContentTag = SAXEqualTags(localName, XMLName.content) + let isSummaryTag = SAXEqualTags(localName, XMLName.summary) + + if parsingArticle && (isContentTag || isSummaryTag) { + + if isContentTag { + currentArticle?.language = xmlAttributes["xml:lang"] + } + + let contentType = xmlAttributes["type"]; + if contentType == "xhtml" { + parsingXHTML = true + xhtmlString = "" + return + } + } + + if !parsingArticle && SAXEqualTags(localName, XMLName.link) { + addFeedLink() + return + } + + if SAXEqualTags(localName, XMLName.feed) { + addFeedLanguage() + } + + saxParser.beginStoringCharacters() + } + + public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + if SAXEqualTags(localName, XMLName.feed) { + endFeedFound = true + return + } + + if endFeedFound { + return + } + + if parsingXHTML { + + let isContentTag = SAXEqualTags(localName, XMLName.content) + let isSummaryTag = SAXEqualTags(localName, XMLName.summary) + + if parsingArticle && (isContentTag || isSummaryTag) { + + if isContentTag { + currentArticle?.body = xhtmlString + } + + else if isSummaryTag { + if (currentArticle?.body?.count ?? 0) < 1 { + currentArticle?.body = xhtmlString + } + } + } + + if isContentTag || isSummaryTag { + parsingXHTML = false + } + + if var xhtmlString { + if let localNameString = String(xmlPointer: localName) { + xhtmlString.append("") + } + } else { + assertionFailure("xhtmlString must not be nil when parsingXHTML in xmlEndElement.") + } + } + + else if parsingAuthor { + + if SAXEqualTags(localName, XMLName.author) { + parsingAuthor = false + if let currentAuthor, !currentAuthor.isEmpty() { + currentArticle?.addAuthor(currentAuthor) + } + currentAuthor = nil + } + else if SAXEqualTags(localName, XMLName.name) { + currentAuthor?.name = saxParser.currentStringWithTrimmedWhitespace + } + else if SAXEqualTags(localName, XMLName.email) { + currentAuthor?.emailAddress = saxParser.currentStringWithTrimmedWhitespace + } + else if SAXEqualTags(localName, XMLName.uri) { + currentAuthor?.url = saxParser.currentStringWithTrimmedWhitespace + } + } + + else if SAXEqualTags(localName, XMLName.entry) { + parsingArticle = false + } + + else if parsingArticle && !parsingSource { + addArticleElement(saxParser, localName, prefix) + } + + else if SAXEqualTags(localName, XMLName.source) { + parsingSource = false + } + + else if !parsingArticle && !parsingSource && SAXEqualTags(localName, XMLName.title) { + addFeedTitle(saxParser) + } + + _ = attributesStack.popLast() + } + + public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + + guard parsingXHTML else { + return + } + guard var s = String(xmlPointer: xmlCharactersFound, count: count) else { + return + } + + // libxml decodes all entities; we need to re-encode certain characters + // (<, >, and &) when inside XHTML text content. + s = s.replacingOccurrences(of: "<", with: "&;lt;") + s = s.replacingOccurrences(of: ">", with: "&;gt;") + s = s.replacingOccurrences(of: "&", with: "&") + + xhtmlString = s + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSArticle.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSArticle.swift new file mode 100644 index 000000000..0bfe62cbb --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSArticle.swift @@ -0,0 +1,111 @@ +// +// RSSArticle.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation +import FoundationExtras + +final class RSSArticle { + + var feedURL: String + + /// An RSS guid, if present, or calculated from other attributes. + /// Should be unique to the feed, but not necessarily unique + /// across different feeds. (Not suitable for a database ID.) + lazy var articleID: String = { + if let guid { + return guid + } + return calculatedArticleID() + }() + + var guid: String? + var title: String? + var body: String? + var link: String? + var permalink: String? + var authors: [RSSAuthor]? + var enclosures: [RSSEnclosure]? + var datePublished: Date? + var dateModified: Date? + var dateParsed: Date + var language: String? + + init(_ feedURL: String) { + self.feedURL = feedURL + self.dateParsed = Date() + } + + func addEnclosure(_ enclosure: RSSEnclosure) { + + if enclosures == nil { + enclosures = [RSSEnclosure]() + } + enclosures!.append(enclosure) + } + + func addAuthor(_ author: RSSAuthor) { + + if authors == nil { + authors = [RSSAuthor]() + } + authors!.append(author) + } +} + +private extension RSSArticle { + + func calculatedArticleID() -> String { + + // Concatenate a combination of properties when no guid. Then hash the result. + // In general, feeds should have guids. When they don't, re-runs are very likely, + // because there's no other 100% reliable way to determine identity. + // This is intended to create an ID unique inside a feed, but not globally unique. + // Not suitable for a database ID, in other words. + + var s = "" + + let datePublishedTimeStampString: String? = { + guard let datePublished else { + return nil + } + return String(format: "%.0f", datePublished.timeIntervalSince1970) + }() + + // Ideally we have a permalink and a pubDate. + // Either one would probably be a good guid, but together they should be rock-solid. + // (In theory. Feeds are buggy, though.) + if let permalink, !permalink.isEmpty, let datePublishedTimeStampString { + s.append(permalink) + s.append(datePublishedTimeStampString) + } + else if let link, !link.isEmpty, let datePublishedTimeStampString { + s.append(link) + s.append(datePublishedTimeStampString) + } + else if let title, !title.isEmpty, let datePublishedTimeStampString { + s.append(title) + s.append(datePublishedTimeStampString) + } + else if let datePublishedTimeStampString { + s.append(datePublishedTimeStampString) + } + else if let permalink, !permalink.isEmpty { + s.append(permalink) + } + else if let link, !link.isEmpty { + s.append(link) + } + else if let title, !title.isEmpty { + s.append(title) + } + else if let body, !body.isEmpty { + s.append(body) + } + + return s.md5String + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSAuthor.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSAuthor.swift new file mode 100644 index 000000000..297470b85 --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSAuthor.swift @@ -0,0 +1,40 @@ +// +// RSSAuthor.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation + +final class RSSAuthor { + + var name: String? + var url: String? + var avatarURL: String? + var emailAddress: String? + + init(name: String? = nil, url: String? = nil, avatarURL: String? = nil, emailAddress: String? = nil) { + self.name = name + self.url = url + self.avatarURL = avatarURL + self.emailAddress = emailAddress + } + + /// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.) + convenience init(singleString: String) { + + if singleString.contains("@") { + self.init(emailAddress: singleString) + } else if singleString.lowercased().hasPrefix("http") { + self.init(url: singleString) + } else { + self.init(name: singleString) + } + } + + func isEmpty() -> Bool { + + name == nil && url == nil && avatarURL == nil && emailAddress == nil + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSEnclosure.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSEnclosure.swift new file mode 100644 index 000000000..a427475c8 --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSEnclosure.swift @@ -0,0 +1,20 @@ +// +// RSSEnclosure.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation + +final class RSSEnclosure { + + var url: String + var length: Int? + var mimeType: String? + var title: String? + + init(url: String) { + self.url = url + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeed.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeed.swift new file mode 100644 index 000000000..34a334d3b --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeed.swift @@ -0,0 +1,22 @@ +// +// RSSFeed.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation + +final class RSSFeed { + + var urlString: String + var title: String? + var link: String? + var language: String? + + var articles: [RSSArticle]? + + init(urlString: String) { + self.urlString = urlString + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeedTransformer.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeedTransformer.swift new file mode 100644 index 000000000..4264a12f5 --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeedTransformer.swift @@ -0,0 +1,75 @@ +// +// RSSFeedTransformer.swift +// RSParser +// +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +struct RSSFeedTransformer { + + /// Turn an internal RSSFeed into a public ParsedFeed. + static func parsedFeed(with feed: RSSFeed, feedType: FeedType) -> ParsedFeed { + + let items = parsedItems(feed.articles) + return ParsedFeed(type: feedType, title: feed.title, homePageURL: feed.link, feedURL: feed.urlString, language: feed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) + } +} + +private extension RSSFeedTransformer { + + static func parsedItems(_ articles: [RSSArticle]?) -> Set { + + guard let articles else { + return Set() + } + + return Set(articles.map(parsedItem)) + } + + static func parsedItem(_ article: RSSArticle) -> ParsedItem { + + let uniqueID = article.articleID + let url = article.permalink + let externalURL = article.link + let title = article.title + let language = article.language + let contentHTML = article.body + let datePublished = article.datePublished + let dateModified = article.dateModified + let authors = parsedAuthors(article.authors) + let attachments = parsedAttachments(article.enclosures) + + return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: article.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) + } + + static func parsedAuthors(_ authors: [RSSAuthor]?) -> Set? { + + guard let authors = authors, !authors.isEmpty else { + return nil + } + + let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in + return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) + } + + return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) + } + + static func parsedAttachments(_ enclosures: [RSSEnclosure]?) -> Set? { + + guard let enclosures = enclosures, !enclosures.isEmpty else { + return nil + } + + let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in + + let sizeInBytes = (enclosure.length ?? 0) > 0 ? enclosure.length : nil + return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) + } + + return attachments.isEmpty ? nil : Set(attachments) + } +} diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSParser.swift new file mode 100644 index 000000000..75ead7108 --- /dev/null +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSParser.swift @@ -0,0 +1,366 @@ +// +// RSSParser.swift +// RSParser +// +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation +import FoundationExtras + +public final class RSSParser { + + private let feedURL: String + private let data: Data + private let feed: RSSFeed + private var articles = [RSSArticle]() + private var currentArticle: RSSArticle? { + articles.last + } + + private var endRSSFound = false + private var isRDF = false + private var parsingArticle = false + private var parsingChannelImage = false + private var parsingAuthor = false + private var currentAttributes: StringDictionary? + + static func parsedFeed(urlString: String, data: Data) -> RSSFeed { + + let parser = RSSParser(urlString: urlString, data: data) + parser.parse() + return parser.feed + } + + init(urlString: String, data: Data) { + self.feedURL = urlString + self.data = data + self.feed = RSSFeed(urlString: urlString) + } +} + +private extension RSSParser { + + func parse() { + + let saxParser = SAXParser(delegate: self, data: data) + saxParser.parse() + feed.articles = articles + } + + private struct XMLName { + static let uppercaseRDF = "RDF".utf8CString + static let item = "item".utf8CString + static let guid = "guid".utf8CString + static let enclosure = "enclosure".utf8CString + static let image = "image".utf8CString + static let author = "author".utf8CString + static let rss = "rss".utf8CString + static let link = "link".utf8CString + static let title = "title".utf8CString + static let language = "language".utf8CString + static let dc = "dc".utf8CString + static let content = "content".utf8CString + static let encoded = "encoded".utf8CString + static let creator = "creator".utf8CString + static let date = "date".utf8CString + static let pubDate = "pubDate".utf8CString + static let description = "description".utf8CString + } + + func addFeedElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { + + guard prefix == nil else { + return + } + + if SAXEqualTags(localName, XMLName.link) { + if feed.link == nil { + feed.link = saxParser.currentString + } + } + else if SAXEqualTags(localName, XMLName.title) { + feed.title = saxParser.currentString + } + else if SAXEqualTags(localName, XMLName.language) { + feed.language = saxParser.currentString + } + } + + func addArticle() { + let article = RSSArticle(feedURL) + articles.append(article) + } + + func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { + + guard let currentArticle else { + return + } + + if let prefix, SAXEqualTags(prefix, XMLName.dc) { + addDCElement(saxParser, localName, currentArticle) + return + } + + if let prefix, SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) { + if let currentString = saxParser.currentString, !currentString.isEmpty { + currentArticle.body = currentString + } + return + } + + guard prefix == nil else { + return + } + + if let currentString = saxParser.currentString { + if SAXEqualTags(localName, XMLName.guid) { + addGuid(currentString, currentArticle) + } + else if SAXEqualTags(localName, XMLName.author) { + addAuthorWithString(currentString, currentArticle) + } + else if SAXEqualTags(localName, XMLName.link) { + currentArticle.link = urlString(currentString) + } + else if SAXEqualTags(localName, XMLName.description) { + if currentArticle.body == nil { + currentArticle.body = currentString + } + } + else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) { + currentArticle.title = currentString + } + } + else if SAXEqualTags(localName, XMLName.pubDate) { + currentArticle.datePublished = currentDate(saxParser) + } + else if SAXEqualTags(localName, XMLName.enclosure), let currentAttributes { + addEnclosure(currentAttributes, currentArticle) + } + } + + func addDCElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ currentArticle: RSSArticle) { + + if SAXEqualTags(localName, XMLName.creator) { + if let currentString = saxParser.currentString { + addAuthorWithString(currentString, currentArticle) + } + } + else if SAXEqualTags(localName, XMLName.date) { + currentArticle.datePublished = currentDate(saxParser) + } + } + + static let isPermalinkKey = "isPermaLink" + static let isPermalinkLowercaseKey = "ispermalink" + static let falseValue = "false" + + func addGuid(_ guid: String, _ currentArticle: RSSArticle) { + + currentArticle.guid = guid + + guard let currentAttributes else { + return + } + + let isPermaLinkValue: String? = { + + if let value = currentAttributes[Self.isPermalinkKey] { + return value + } + // Allow for `ispermalink`, `isPermalink`, etc. + for (key, value) in currentAttributes { + if key.lowercased() == Self.isPermalinkLowercaseKey { + return value + } + } + + return nil + }() + + // Spec: `isPermaLink is optional, its default value is true.` + // https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt + // Return only if non-nil and equal to false — otherwise it’s a permalink. + if let isPermaLinkValue, isPermaLinkValue == Self.falseValue { + return + } + + // Feed bug found in the wild: using a guid that’s not really a permalink + // and not realizing that `isPermaLink` is true by default. + if stringIsProbablyAURLOrRelativePath(guid) { + currentArticle.permalink = urlString(guid) + } + } + + func stringIsProbablyAURLOrRelativePath(_ s: String) -> Bool { + + // The RSS guid is defined as a permalink, except when it appears like this: + // `some—identifier` + // However, people often seem to think it’s *not* a permalink by default, even + // though it is. So we try to detect the situation where the value is not a URL string, + // and not even a relative path. This may need to evolve over time. + + if !s.contains("/") { + // This seems to be just about the best possible check. + // Bad guids are often just integers, for instance. + return false + } + + if s.lowercased().hasPrefix("tag:") { + // A common non-URL guid form starts with `tag:`. + return false + } + + return true + } + + /// Do best attempt at turning a string into a URL string. + /// + /// If it already appears to be a URL, return it. + /// Otherwise, treat it like a relative URL and resolve using + /// the URL of the home page of the feed (if available) + /// or the URL of the feed. + /// + /// The returned value is not guaranteed to be a valid URL string. + /// It’s a best attempt without going to heroic lengths. + func urlString(_ s: String) -> String { + + if s.lowercased().hasPrefix("http") { + return s + } + + let baseURLString = feed.link ?? feedURL + guard let baseURL = URL(string: baseURLString) else { + return s + } + guard let resolvedURL = URL(string: s, relativeTo: baseURL) else { + return s + } + + return resolvedURL.absoluteString + } + + func addAuthorWithString(_ authorString: String, _ currentArticle: RSSArticle) { + + if authorString.isEmpty { + return + } + + let author = RSSAuthor(singleString: authorString) + currentArticle.addAuthor(author) + } + + private struct EnclosureKey { + static let url = "url" + static let length = "length" + static let type = "type" + } + + func addEnclosure(_ attributes: StringDictionary, _ currentArticle: RSSArticle) { + + guard let url = attributes[EnclosureKey.url], !url.isEmpty else { + return + } + + let enclosure = RSSEnclosure(url: url) + if let lengthValue = attributes[EnclosureKey.length], let length = Int(lengthValue) { + enclosure.length = length + } + enclosure.mimeType = attributes[EnclosureKey.type] + + currentArticle.addEnclosure(enclosure) + } + + func currentDate(_ saxParser: SAXParser) -> Date? { + + guard let data = saxParser.currentCharacters else { + return nil + } + return DateParser.date(data: data) + } +} + +extension RSSParser: SAXParserDelegate { + + static let rdfAbout = "rdf:about" + + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + + if endRSSFound { + return + } + + if SAXEqualTags(localName, XMLName.uppercaseRDF) { + isRDF = true + return + } + + var xmlAttributes: StringDictionary? = nil + if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(localName, XMLName.enclosure) { + xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) + } + if currentAttributes != xmlAttributes { + currentAttributes = xmlAttributes + } + + if prefix == nil && SAXEqualTags(localName, XMLName.item) { + addArticle() + parsingArticle = true + + if isRDF, let rdfGuid = xmlAttributes?[Self.rdfAbout], let currentArticle { // RSS 1.0 guid + currentArticle.guid = rdfGuid + currentArticle.permalink = rdfGuid + } + } + else if prefix == nil && SAXEqualTags(localName, XMLName.image) { + parsingChannelImage = true + } + else if prefix == nil && SAXEqualTags(localName, XMLName.author) { + if parsingArticle { + parsingAuthor = true + } + } + + if !parsingChannelImage { + saxParser.beginStoringCharacters() + } + } + + public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + if endRSSFound { + return + } + + if isRDF && SAXEqualTags(localName, XMLName.uppercaseRDF) { + endRSSFound = true + } + else if SAXEqualTags(localName, XMLName.rss) { + endRSSFound = true + } + else if SAXEqualTags(localName, XMLName.image) { + parsingChannelImage = false + } + else if SAXEqualTags(localName, XMLName.item) { + parsingArticle = false + } + else if parsingArticle { + addArticleElement(saxParser, localName, prefix) + if SAXEqualTags(localName, XMLName.author) { + parsingAuthor = false + } + } + else if !parsingChannelImage { + addFeedElement(saxParser, localName, prefix) + } + } + + public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + + // Required method. + } +} + diff --git a/Modules/Parser/Sources/Parser/JSON/JSONTypes.swift b/Modules/Parser/Sources/Parser/FeedParser/JSON/JSONTypes.swift similarity index 100% rename from Modules/Parser/Sources/Parser/JSON/JSONTypes.swift rename to Modules/Parser/Sources/Parser/FeedParser/JSON/JSONTypes.swift diff --git a/Modules/Parser/Sources/Parser/JSON/JSONUtilities.swift b/Modules/Parser/Sources/Parser/FeedParser/JSON/JSONUtilities.swift similarity index 100% rename from Modules/Parser/Sources/Parser/JSON/JSONUtilities.swift rename to Modules/Parser/Sources/Parser/FeedParser/JSON/JSONUtilities.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift b/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift deleted file mode 100644 index 4c5eb3338..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift +++ /dev/null @@ -1,76 +0,0 @@ -// -// FeedParser.swift -// RSParser -// -// Created by Brent Simmons on 6/20/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation -import ParserObjC - -// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. -// You don’t need to know the type of feed. - -public struct FeedParser { - - public static func canParse(_ parserData: ParserData) -> Bool { - - let type = feedType(parserData) - - switch type { - case .jsonFeed, .rssInJSON, .rss, .atom: - return true - default: - return false - } - } - - public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? { - - let type = feedType(parserData) - - switch type { - - case .jsonFeed: - return try JSONFeedParser.parse(parserData) - - case .rssInJSON: - return try RSSInJSONParser.parse(parserData) - - case .rss: - return RSSParser.parse(parserData) - - case .atom: - return AtomParser.parse(parserData) - - case .unknown, .notAFeed: - return nil - } - } - - /// For unit tests measuring performance. - public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? { - - let type = feedType(parserData) - - switch type { - - case .jsonFeed: - return try JSONFeedParser.parse(parserData) - - case .rssInJSON: - return try RSSInJSONParser.parse(parserData) - - case .rss: - return RSSParser.parse(parserData) - - case .atom: - return AtomParser.parse(parserData) - - case .unknown, .notAFeed: - return nil - } - } - -} diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedType.swift b/Modules/Parser/Sources/Parser/Feeds/FeedType.swift deleted file mode 100644 index 6638b6543..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/FeedType.swift +++ /dev/null @@ -1,64 +0,0 @@ -// -// FeedType.swift -// RSParser -// -// Created by Brent Simmons on 6/20/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif - -public enum FeedType: Sendable { - case rss - case atom - case jsonFeed - case rssInJSON - case unknown - case notAFeed -} - - -private let minNumberOfBytesRequired = 128 - -public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { - - // Can call with partial data — while still downloading, for instance. - // If there’s not enough data, return .unknown. Ask again when there’s more data. - // If it’s definitely not a feed, return .notAFeed. - // - // This is fast enough to call on the main thread. - - if parserData.data.count < minNumberOfBytesRequired { - return .unknown - } - - let nsdata = parserData.data as NSData - - if nsdata.isProbablyJSONFeed() { - return .jsonFeed - } - if nsdata.isProbablyRSSInJSON() { - return .rssInJSON - } - if nsdata.isProbablyRSS() { - return .rss - } - if nsdata.isProbablyAtom() { - return .atom - } - - if isPartialData && nsdata.isProbablyJSON() { - // Might not be able to detect a JSON Feed without all data. - // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) - // has, at this writing, the JSON version element at the end of the feed, - // which is totally legal — but it means not being able to detect - // that it’s a JSON Feed without all the data. - // So this returns .unknown instead of .notAFeed. - return .unknown - } - - return .notAFeed -} diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedHub.swift b/Modules/Parser/Sources/Parser/Feeds/ParsedHub.swift deleted file mode 100644 index a1e95e7e2..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedHub.swift +++ /dev/null @@ -1,15 +0,0 @@ -// -// ParsedHub.swift -// RSParser -// -// Created by Brent Simmons on 6/20/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation - -public struct ParsedHub: Hashable, Sendable { - - public let type: String - public let url: String -} diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift deleted file mode 100644 index 93e01dcd3..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift +++ /dev/null @@ -1,32 +0,0 @@ -// -// AtomParser.swift -// RSParser -// -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation - -#if SWIFT_PACKAGE -import ParserObjC -#endif - -// RSSParser wraps the Objective-C RSAtomParser. -// -// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. -// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates -// the same things that JSONFeedParser and RSSInJSONParser create. -// -// In general, you should see FeedParser.swift for all your feed-parsing needs. - -public struct AtomParser { - - public static func parse(_ parserData: ParserData) -> ParsedFeed? { - - if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) { - return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) - } - return nil - } -} diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift b/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift deleted file mode 100644 index 27a5772c3..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift +++ /dev/null @@ -1,80 +0,0 @@ -// -// RSParsedFeedTransformer.swift -// RSParser -// -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif - -// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago. -// They create an RSParsedFeed object and related Objective-C objects. -// These functions take an RSParsedFeed and return a Swift-y ParsedFeed, -// which is part of providing a single API for feed parsing. - -struct RSParsedFeedTransformer { - - static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed { - - let items = parsedItems(rsParsedFeed.articles) - return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) - } -} - -private extension RSParsedFeedTransformer { - - static func parsedItems(_ parsedArticles: Set) -> Set { - - // Create Set from Set - - return Set(parsedArticles.map(parsedItem)) - } - - static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem { - - let uniqueID = parsedArticle.articleID - let url = parsedArticle.permalink - let externalURL = parsedArticle.link - let title = parsedArticle.title - let language = parsedArticle.language - let contentHTML = parsedArticle.body - let datePublished = parsedArticle.datePublished - let dateModified = parsedArticle.dateModified - let authors = parsedAuthors(parsedArticle.authors) - let attachments = parsedAttachments(parsedArticle.enclosures) - - return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) - } - - static func parsedAuthors(_ authors: Set?) -> Set? { - - guard let authors = authors, !authors.isEmpty else { - return nil - } - - let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in - return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) - } - - return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) - } - - static func parsedAttachments(_ enclosures: Set?) -> Set? { - - guard let enclosures = enclosures, !enclosures.isEmpty else { - return nil - } - - let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in - - let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil - return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) - } - - return attachments.isEmpty ? nil : Set(attachments) - } -} diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift deleted file mode 100644 index 85b88d83f..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift +++ /dev/null @@ -1,29 +0,0 @@ -// -// RSSParser.swift -// RSParser -// -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation -import ParserObjC - -// RSSParser wraps the Objective-C RSRSSParser. -// -// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. -// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates -// the same things that JSONFeedParser and RSSInJSONParser create. -// -// In general, you should see FeedParser.swift for all your feed-parsing needs. - -public struct RSSParser { - - public static func parse(_ parserData: ParserData) -> ParsedFeed? { - - if let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) { - return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) - } - return nil - } -} diff --git a/Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift new file mode 100644 index 000000000..15fd31e29 --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift @@ -0,0 +1,349 @@ +// +// HTMLEntityDecoder.swift +// +// +// Created by Brent Simmons on 9/14/24. +// + +import Foundation + +public final class HTMLEntityDecoder { + + public static func decodedString(_ encodedString: String) -> String { + + let scanner = EntityScanner(string: encodedString) + var result = "" + var didDecodeAtLeastOneEntity = false + + while true { + + let scannedString = scanner.scanUpToAmpersand() + if !scannedString.isEmpty { + result.append(scannedString) + } + if scanner.isAtEnd { + break + } + + let savedScanLocation = scanner.scanLocation + + if let decodedEntity = scanner.scanEntityValue() { + result.append(decodedEntity) + didDecodeAtLeastOneEntity = true + } + else { + result.append("&") + scanner.scanLocation = savedScanLocation + 1 + } + + if scanner.isAtEnd { + break + } + } + + if !didDecodeAtLeastOneEntity { // No entities decoded? + return encodedString + } + return result + } +} + +/// Purpose-built version of NSScanner, which has deprecated the parts we want to use. +final class EntityScanner { + + let string: String + let count: Int + var scanLocation = 0 + + var isAtEnd: Bool { + scanLocation >= count + } + + var currentCharacter: Character? { + guard !isAtEnd else { + return nil + } + return string.characterAtIntIndex(scanLocation) + } + + init(string: String) { + self.string = string + self.count = string.count + } + + static let ampersandCharacter = Character("&") + + /// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`. + /// - Returns: the scanned portion before `characterToFind`. May be empty string. + func scanUpToAmpersand() -> String { + + let characterToFind = Self.ampersandCharacter + var scanned = "" + + while true { + + guard let ch = currentCharacter else { + break + } + scanLocation += 1 + + if ch == characterToFind { + break + } + else { + scanned.append(ch) + } + } + + return scanned + } + + static let semicolonCharacter = Character(";") + + func scanEntityValue() -> String? { + + let initialScanLocation = scanLocation + let maxEntityLength = 20 // It’s probably smaller, but this is just for sanity. + + while true { + + guard let ch = currentCharacter else { + break + } + if CharacterSet.whitespacesAndNewlines.contains(ch.unicodeScalars.first!) { + break + } + + if ch == Self.semicolonCharacter { + let entityRange = initialScanLocation.. maxEntityLength { + break + } + if isAtEnd { + break + } + } + + return nil + } +} + +extension String { + + func indexForInt(_ i: Int) -> Index? { + + index(startIndex, offsetBy: i, limitedBy: endIndex) + } + + func characterAtIntIndex(_ i: Int) -> Character? { + + guard let index = indexForInt(i) else { + return nil + } + + return self[index] + } + + func substring(intRange: Range) -> String? { + + guard let rangeLower = indexForInt(intRange.lowerBound) else { + return nil + } + guard let rangeUpper = indexForInt(intRange.upperBound) else { + return nil + } + + return String(self[rangeLower.. String? { + + var s = rawEntity + + if s.hasPrefix("&") { + s.removeFirst() + } + if s.hasSuffix(";") { + s.removeLast() + } + + if let decodedEntity = entitiesDictionary[s] { + return decodedEntity + } + + if s.hasPrefix("#x") || s.hasPrefix("#X") { // Hex + let scanner = Scanner(string: s) + scanner.charactersToBeSkipped = CharacterSet(charactersIn: "#xX") + var hexValue: UInt64 = 0 + if scanner.scanHexInt64(&hexValue) { + return stringWithValue(UInt32(hexValue)) + } + return nil + } + + else if s.hasPrefix("#") { + s.removeFirst() + guard let value = UInt32(s), value >= 1 else { + return nil + } + return stringWithValue(value) + } + + return nil +} + +private func stringWithValue(_ value: UInt32) -> String? { + + // From WebCore's HTMLEntityParser + let windowsLatin1ExtensionArray: [UInt32] = [ + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F + ] + + var modifiedValue = value + + if (modifiedValue & ~0x1F) == 0x80 { // value >= 128 && value < 160 + modifiedValue = windowsLatin1ExtensionArray[Int(modifiedValue - 0x80)] + } + + modifiedValue = CFSwapInt32HostToLittle(modifiedValue) + + let data = Data(bytes: &modifiedValue, count: MemoryLayout.size(ofValue: modifiedValue)) + + return String(data: data, encoding: .utf32LittleEndian) +} + +private let entitiesDictionary = + [ + "AElig": "Æ", + "Aacute": "Á", + "Acirc": "Â", + "Agrave": "À", + "Aring": "Å", + "Atilde": "Ã", + "Auml": "Ä", + "Ccedil": "Ç", + "Dstrok": "Ð", + "ETH": "Ð", + "Eacute": "É", + "Ecirc": "Ê", + "Egrave": "È", + "Euml": "Ë", + "Iacute": "Í", + "Icirc": "Î", + "Igrave": "Ì", + "Iuml": "Ï", + "Ntilde": "Ñ", + "Oacute": "Ó", + "Ocirc": "Ô", + "Ograve": "Ò", + "Oslash": "Ø", + "Otilde": "Õ", + "Ouml": "Ö", + "Pi": "Π", + "THORN": "Þ", + "Uacute": "Ú", + "Ucirc": "Û", + "Ugrave": "Ù", + "Uuml": "Ü", + "Yacute": "Y", + "aacute": "á", + "acirc": "â", + "acute": "´", + "aelig": "æ", + "agrave": "à", + "amp": "&", + "apos": "'", + "aring": "å", + "atilde": "ã", + "auml": "ä", + "brkbar": "¦", + "brvbar": "¦", + "ccedil": "ç", + "cedil": "¸", + "cent": "¢", + "copy": "©", + "curren": "¤", + "deg": "°", + "die": "¨", + "divide": "÷", + "eacute": "é", + "ecirc": "ê", + "egrave": "è", + "eth": "ð", + "euml": "ë", + "euro": "€", + "frac12": "½", + "frac14": "¼", + "frac34": "¾", + "gt": ">", + "hearts": "♥", + "hellip": "…", + "iacute": "í", + "icirc": "î", + "iexcl": "¡", + "igrave": "ì", + "iquest": "¿", + "iuml": "ï", + "laquo": "«", + "ldquo": "“", + "lsquo": "‘", + "lt": "<", + "macr": "¯", + "mdash": "—", + "micro": "µ", + "middot": "·", + "ndash": "–", + "not": "¬", + "ntilde": "ñ", + "oacute": "ó", + "ocirc": "ô", + "ograve": "ò", + "ordf": "ª", + "ordm": "º", + "oslash": "ø", + "otilde": "õ", + "ouml": "ö", + "para": "¶", + "pi": "π", + "plusmn": "±", + "pound": "£", + "quot": "\"", + "raquo": "»", + "rdquo": "”", + "reg": "®", + "rsquo": "’", + "sect": "§", + "shy": stringWithValue(173), + "sup1": "¹", + "sup2": "²", + "sup3": "³", + "szlig": "ß", + "thorn": "þ", + "times": "×", + "trade": "™", + "uacute": "ú", + "ucirc": "û", + "ugrave": "ù", + "uml": "¨", + "uuml": "ü", + "yacute": "y", + "yen": "¥", + "yuml": "ÿ", + "infin": "∞", + "nbsp": stringWithValue(160) + ] diff --git a/Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift new file mode 100644 index 000000000..a07039719 --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift @@ -0,0 +1,22 @@ +// +// HTMLLink.swift +// +// +// Created by Brent Simmons on 9/21/24. +// + +import Foundation + +public final class HTMLLink { + + public var urlString: String? // Absolute URL string + public var text: String? + public var title: String? // Title attribute inside anchor tag + + init(urlString: String? = nil, text: String? = nil, title: String? = nil) { + + self.urlString = urlString + self.text = text + self.title = title + } +} diff --git a/Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift new file mode 100644 index 000000000..d084860d4 --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift @@ -0,0 +1,118 @@ +// +// HTMLLinkParser.swift +// +// +// Created by Brent Simmons on 9/21/24. +// + +import Foundation +import FoundationExtras + +public final class HTMLLinkParser { + + public private(set) var links = [HTMLLink]() + + private let parserData: ParserData + private let baseURL: URL? + + public static func htmlLinks(with parserData: ParserData) -> [HTMLLink] { + + let parser = HTMLLinkParser(parserData) + parser.parse() + return parser.links + } + + init(_ parserData: ParserData) { + + self.parserData = parserData + self.baseURL = URL(string: parserData.url) + } +} + +private extension HTMLLinkParser { + + func parse() { + + let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data) + htmlParser.parse() + } +} + +extension HTMLLinkParser: SAXHTMLParserDelegate { + + private var currentLink: HTMLLink? { + links.last + } + + private struct HTMLAttributeName { + static let href = "href" + static let title = "title" + } + + private func title(with attributesDictionary: StringDictionary) -> String? { + + attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.title) + } + + private func urlString(with attributesDictionary: StringDictionary) -> String? { + + guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href), !href.isEmpty else { + return nil + } + + guard let baseURL, let absoluteURL = URL(string: href, relativeTo: baseURL) else { + assertionFailure("Expected to create URL") + return nil + } + + return absoluteURL.absoluteString + } + + private func handleLinkAttributes(_ attributesDictionary: StringDictionary) { + + guard let currentLink else { + assertionFailure("currentLink must not be nil") + return + } + + currentLink.urlString = urlString(with: attributesDictionary) + currentLink.title = title(with: attributesDictionary) + } + + private struct HTMLName { + static let a = "a".utf8CString + } + + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { + + guard SAXEqualTags(name, HTMLName.a) else { + return + } + + let link = HTMLLink() + links.append(link) + + if let attributesDictionary = saxHTMLParser.attributesDictionary(attributes) { + handleLinkAttributes(attributesDictionary) + } + + saxHTMLParser.beginStoringCharacters() + } + + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) { + + guard SAXEqualTags(name, HTMLName.a) else { + return + } + guard let currentLink else { + assertionFailure("currentLink must not be nil.") + return + } + + currentLink.text = saxHTMLParser.currentStringWithTrimmedWhitespace + } + + public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { + // Nothing needed. + } +} diff --git a/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift new file mode 100644 index 000000000..651fd7a58 --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift @@ -0,0 +1,437 @@ +// +// HTMLMetadata.swift +// +// +// Created by Brent Simmons on 9/22/24. +// + +import Foundation + +public final class HTMLMetadata { + + public let baseURLString: String + public let tags: [HTMLTag] + public let favicons: [HTMLMetadataFavicon]? + public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]? + public let feedLinks: [HTMLMetadataFeedLink]? + public let openGraphProperties: HTMLOpenGraphProperties? + public let twitterProperties: HTMLTwitterProperties? + + init(_ urlString: String, _ tags: [HTMLTag]) { + + self.baseURLString = urlString + self.tags = tags + + self.favicons = Self.resolvedFaviconLinks(urlString, tags) + + if let appleTouchIconTags = Self.appleTouchIconTags(tags) { + self.appleTouchIcons = appleTouchIconTags.map { htmlTag in + HTMLMetadataAppleTouchIcon(urlString, htmlTag) + } + } + else { + self.appleTouchIcons = nil + } + + if let feedLinkTags = Self.feedLinkTags(tags) { + self.feedLinks = feedLinkTags.map { htmlTag in + HTMLMetadataFeedLink(urlString, htmlTag) + } + } + else { + self.feedLinks = nil + } + + self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags) + self.twitterProperties = HTMLTwitterProperties(urlString, tags) + } + + static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? { + + guard let linkTags = linkTagsWithMatchingRel("icon", tags) else { + return nil + } + + var seenHrefs = [String]() + + let favicons: [HTMLMetadataFavicon] = linkTags.compactMap { htmlTag in + + let favicon = HTMLMetadataFavicon(baseURLString, htmlTag) + guard let urlString = favicon.urlString else { + return nil + } + guard !seenHrefs.contains(urlString) else { + return nil + } + seenHrefs.append(urlString) + return favicon + } + + return favicons.isEmpty ? nil : favicons + } + + static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? { + + guard let linkTags = linkTags(tags) else { + return nil + } + + guard let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], linkTags) else { + return nil + } + return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags + } + + static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? { + + guard let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else { + return nil + } + + let feedLinkTags = alternateLinkTags.filter { tag in + + guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else { + return false + } + guard let urlString = urlString(from: attributes), !urlString.isEmpty else { + return false + } + + return true + } + + return feedLinkTags.isEmpty ? nil : feedLinkTags + } + + static func typeIsFeedType(_ type: String) -> Bool { + + let lowerType = type.lowercased() + return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json") + } + + static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? { + + let linkTags = tags.filter { $0.tagType == .link } + return linkTags.isEmpty ? nil : linkTags + } + + static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? { + + // Case-insensitive; matches a whitespace-delimited word + + guard let linkTags = linkTags(tags) else { + return nil + } + + let tagsWithURLString = linkTags.filter { tag in + guard let attributes = tag.attributes else { + return false + } + guard let urlString = urlString(from: attributes), !urlString.isEmpty else { + return false + } + return true + } + if tagsWithURLString.isEmpty { + return nil + } + + guard let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString) else { + return nil + } + return matchingTags.isEmpty ? nil : matchingTags + } + + static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? { + + let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() } + + let matchingTags: [HTMLTag] = { + + tags.filter { tag in + + guard let attributes = tag.attributes else { + return false + } + guard let relValue = relValue(from: attributes) else { + return false + } + + let relValues = relValue.components(separatedBy: .whitespacesAndNewlines) + for oneRelValue in relValues { + let oneLowerRelValue = oneRelValue.lowercased() + + for lowerValueToMatch in lowerValuesToMatch { + if lowerValueToMatch == oneLowerRelValue { + return true + } + } + } + + return false + } + }() + + return matchingTags.isEmpty ? nil : matchingTags + } +} + +public final class HTMLMetadataAppleTouchIcon { + + public let rel: String? + public let sizes: String? + public let size: CGSize? + public let urlString: String? // Absolute + + init(_ urlString: String, _ tag: HTMLTag) { + + guard let attributes = tag.attributes else { + self.rel = nil + self.sizes = nil + self.size = nil + self.urlString = nil + return + } + + self.rel = attributes.object(forCaseInsensitiveKey: "rel") + self.urlString = absoluteURLString(from: attributes, baseURL: urlString) + + guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else { + self.sizes = nil + self.size = nil + return + } + self.sizes = sizes + + let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x")) + if sizeComponents.count == 2, let width = Double(sizeComponents[0]), let height = Double(sizeComponents[1]) { + self.size = CGSize(width: width, height: height) + } + else { + self.size = nil + } + } +} + +public final class HTMLMetadataFeedLink { + + public let title: String? + public let type: String? + public let urlString: String? // Absolute + + init(_ urlString: String, _ tag: HTMLTag) { + + guard let attributes = tag.attributes else { + self.title = nil + self.type = nil + self.urlString = nil + return + } + + self.urlString = absoluteURLString(from: attributes, baseURL: urlString) + self.title = attributes.object(forCaseInsensitiveKey: "title") + self.type = attributes.object(forCaseInsensitiveKey: "type") + } +} + +public final class HTMLMetadataFavicon { + + public let type: String? + public let urlString: String? + + init(_ urlString: String, _ tag: HTMLTag) { + + guard let attributes = tag.attributes else { + self.type = nil + self.urlString = nil + return + } + + self.urlString = absoluteURLString(from: attributes, baseURL: urlString) + self.type = attributes.object(forCaseInsensitiveKey: "type") + } +} + +public final class HTMLOpenGraphProperties { + + // TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image. + // See http://ogp.me/ + + public let image: HTMLOpenGraphImage? + + init(_ urlString: String, _ tags: [HTMLTag]) { + + self.image = Self.parse(tags) + } +} + +private extension HTMLOpenGraphProperties { + + private static let ogPrefix = "og:" + + struct OGKey { + static let property = "property" + static let content = "content" + } + + struct OGValue { + static let ogImage = "og:image" + static let ogImageURL = "og:image:url" + static let ogImageSecureURL = "og:image:secure_url" + static let ogImageType = "og:image:type" + static let ogImageAlt = "og:image:alt" + static let ogImageWidth = "og:image:width" + static let ogImageHeight = "og:image:height" + } + + static func parse(_ tags: [HTMLTag]) -> HTMLOpenGraphImage? { + + let metaTags = tags.filter { $0.tagType == .meta } + if metaTags.isEmpty { + return nil + } + + // HTMLOpenGraphImage properties to fill in. + var url: String? + var secureURL: String? + var mimeType: String? + var width: CGFloat? + var height: CGFloat? + var altText: String? + + for tag in metaTags { + + guard let attributes = tag.attributes else { + continue + } + guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else { + continue + } + guard let content = attributes[OGKey.content] else { + continue + } + + if propertyName == OGValue.ogImage { + url = content + } + else if propertyName == OGValue.ogImageURL { + url = content + } + else if propertyName == OGValue.ogImageSecureURL { + secureURL = content + } + else if propertyName == OGValue.ogImageType { + mimeType = content + } + else if propertyName == OGValue.ogImageAlt { + altText = content + } + else if propertyName == OGValue.ogImageWidth { + if let value = Double(content) { + width = CGFloat(value) + } + } + else if propertyName == OGValue.ogImageHeight { + if let value = Double(content) { + height = CGFloat(value) + } + } + } + + if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil { + return nil + } + + return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText) + } +} + +public final class HTMLOpenGraphImage { + + public let url : String? + public let secureURL: String? + public let mimeType: String? + public let width: CGFloat? + public let height: CGFloat? + public let altText: String? + + init(url: String?, secureURL: String?, mimeType: String?, width: CGFloat?, height: CGFloat?, altText: String?) { + + self.url = url + self.secureURL = secureURL + self.mimeType = mimeType + self.width = width + self.height = height + self.altText = altText + } +} + +public final class HTMLTwitterProperties { + + public let imageURL: String? // twitter:image:src + + private struct TwitterKey { + static let name = "name" + static let content = "content" + } + + private struct TwitterValue { + static let imageSrc = "twitter:image:src" + } + + init(_ urlString: String, _ tags: [HTMLTag]) { + + let imageURL: String? = { + for tag in tags { + guard tag.tagType == .meta else { + continue + } + guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else { + continue + } + guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else { + continue + } + return content + } + + return nil + }() + + self.imageURL = imageURL + } +} + +private func urlString(from attributes: HTMLTagAttributes) -> String? { + + if let urlString = attributes.object(forCaseInsensitiveKey: "href") { + return urlString + } + return attributes.object(forCaseInsensitiveKey: "src") +} + +private func relValue(from attributes: HTMLTagAttributes) -> String? { + + attributes.object(forCaseInsensitiveKey: "rel") +} + +private func absoluteURLString(from attributes: HTMLTagAttributes, baseURL: String) -> String? { + + guard let urlString = urlString(from: attributes), !urlString.isEmpty else { + return nil + } + + return absoluteURLStringWithRelativeURLString(urlString, baseURLString: baseURL) +} + +private func absoluteURLStringWithRelativeURLString(_ relativeURLString: String, baseURLString: String) -> String? { + + guard let baseURL = URL(string: baseURLString) else { + return nil + } + guard let absoluteURL = URL(string: relativeURLString, relativeTo: baseURL) else { + return nil + } + return absoluteURL.absoluteURL.standardized.absoluteString +} + diff --git a/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadataParser.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadataParser.swift new file mode 100644 index 000000000..fe317067a --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadataParser.swift @@ -0,0 +1,102 @@ +// +// HTMLMetadataParser.swift +// +// +// Created by Brent Simmons on 9/22/24. +// + +import Foundation +import FoundationExtras + +public final class HTMLMetadataParser { + + private var tags = [HTMLTag]() + + public static func metadata(with parserData: ParserData) -> HTMLMetadata { + + HTMLMetadataParser().parse(parserData) + } +} + +private extension HTMLMetadataParser { + + func parse(_ parserData: ParserData) -> HTMLMetadata { + + tags = [HTMLTag]() + + let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data) + htmlParser.parse() + + return HTMLMetadata(parserData.url, tags) + } +} + +extension HTMLMetadataParser: SAXHTMLParserDelegate { + + private struct HTMLName { + + static let link = "link".utf8CString + static let meta = "meta".utf8CString + } + + private struct HTMLKey { + + static let href = "href" + static let src = "src" + static let rel = "rel" + } + + private func link(with attributes: StringDictionary) -> String? { + + if let link = attributes.object(forCaseInsensitiveKey: HTMLKey.href) { + return link + } + + return attributes.object(forCaseInsensitiveKey: HTMLKey.src) + } + + private func handleLinkAttributes(_ attributes: StringDictionary) { + + guard let rel = attributes.object(forCaseInsensitiveKey: HTMLKey.rel), !rel.isEmpty else { + return + } + guard let link = link(with: attributes), !link.isEmpty else { + return + } + + let tag = HTMLTag(tagType: .link, attributes: attributes) + tags.append(tag) + } + + private func handleMetaAttributes(_ attributes: StringDictionary) { + + let tag = HTMLTag(tagType: .meta, attributes: attributes) + tags.append(tag) + } + + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { + + if SAXEqualTags(name, HTMLName.link) { + let d = saxHTMLParser.attributesDictionary(attributes) + if let d, !d.isEmpty { + handleLinkAttributes(d) + } + } + else if SAXEqualTags(name, HTMLName.meta) { + let d = saxHTMLParser.attributesDictionary(attributes) + if let d, !d.isEmpty { + handleMetaAttributes(d) + } + } + } + + public func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) { + + // Nothing to do + } + + public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { + + // Nothing to do + } +} diff --git a/Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift new file mode 100644 index 000000000..e0bcfad5e --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift @@ -0,0 +1,26 @@ +// +// HTMLTag.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public typealias HTMLTagAttributes = [String: String] + +public struct HTMLTag: Sendable { + + public enum TagType: Sendable { + case link + case meta + } + + public let tagType: TagType + public let attributes: HTMLTagAttributes? + + public init(tagType: TagType, attributes: HTMLTagAttributes?) { + self.tagType = tagType + self.attributes = attributes + } +} diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLAttributes.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLAttributes.swift new file mode 100644 index 000000000..508dd339a --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLAttributes.swift @@ -0,0 +1,53 @@ +// +// OPMLAttributes.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +// OPML allows for arbitrary attributes. +// These are the common attributes in OPML files used as RSS subscription lists. + +private let opmlTextKey = "text" +private let opmlTitleKey = "title" +private let opmlDescriptionKey = "description" +private let opmlTypeKey = "type" +private let opmlVersionKey = "version" +private let opmlHMTLURLKey = "htmlUrl" +private let opmlXMLURLKey = "xmlUrl" + +// A frequent error in OPML files is to mess up the capitalization, +// so these do a case-insensitive lookup. + +extension Dictionary where Key == String, Value == String { + + var opml_text: String? { + object(forCaseInsensitiveKey: opmlTextKey) + } + + var opml_title: String? { + object(forCaseInsensitiveKey: opmlTitleKey) + } + + var opml_description: String? { + object(forCaseInsensitiveKey: opmlDescriptionKey) + } + + var opml_type: String? { + object(forCaseInsensitiveKey: opmlTypeKey) + } + + var opml_version: String? { + object(forCaseInsensitiveKey: opmlVersionKey) + } + + var opml_htmlUrl: String? { + object(forCaseInsensitiveKey: opmlHMTLURLKey) + } + + var opml_xmlUrl: String? { + object(forCaseInsensitiveKey: opmlXMLURLKey) + } +} diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift new file mode 100644 index 000000000..020ad11d3 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift @@ -0,0 +1,19 @@ +// +// OPMLDocument.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public final class OPMLDocument: OPMLItem { + + public var title: String? = nil + public var url: String? = nil + + init(url: String?) { + self.url = url + super.init(attributes: nil) + } +} diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift new file mode 100644 index 000000000..a0cd12df6 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift @@ -0,0 +1,40 @@ +// +// OPMLFeedSpecifier.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct OPMLFeedSpecifier: Sendable { + + public let title: String? + public let feedDescription: String? + public let homePageURL: String? + public let feedURL: String + + init(title: String?, feedDescription: String?, homePageURL: String?, feedURL: String) { + + if String.isEmptyOrNil(title) { + self.title = nil + } else { + self.title = title + } + + if String.isEmptyOrNil(feedDescription) { + self.feedDescription = nil + } else { + self.feedDescription = feedDescription + } + + if String.isEmptyOrNil(homePageURL) { + self.homePageURL = nil + } else { + self.homePageURL = homePageURL + } + + self.feedURL = feedURL + } +} + diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift new file mode 100644 index 000000000..57e4232dc --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift @@ -0,0 +1,42 @@ +// +// OPMLItem.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation +import os + +public class OPMLItem { + + public let feedSpecifier: OPMLFeedSpecifier? + + public let attributes: [String: String]? + public let titleFromAttributes: String? + + public var items: [OPMLItem]? + public var isFolder: Bool { + (items?.count ?? 0) > 0 + } + + init(attributes: [String : String]?) { + + self.titleFromAttributes = attributes?.opml_title ?? attributes?.opml_text + self.attributes = attributes + + if let feedURL = attributes?.opml_xmlUrl { + self.feedSpecifier = OPMLFeedSpecifier(title: self.titleFromAttributes, feedDescription: attributes?.opml_description, homePageURL: attributes?.opml_htmlUrl, feedURL: feedURL) + } else { + self.feedSpecifier = nil + } + } + + public func add(_ item: OPMLItem) { + + if items == nil { + items = [OPMLItem]() + } + items?.append(item) + } +} diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift new file mode 100644 index 000000000..4ec38d5b1 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift @@ -0,0 +1,117 @@ +// +// OPMLParser.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public final class OPMLParser { + + private let parserData: ParserData + private var data: Data { + parserData.data + } + + private var opmlDocument: OPMLDocument? + + private var itemStack = [OPMLItem]() + private var currentItem: OPMLItem? { + itemStack.last + } + + /// Returns nil if data can’t be parsed (if it’s not OPML). + public static func document(with parserData: ParserData) -> OPMLDocument? { + + let opmlParser = OPMLParser(parserData) + opmlParser.parse() + return opmlParser.opmlDocument + } + + init(_ parserData: ParserData) { + self.parserData = parserData + } +} + +private extension OPMLParser { + + func parse() { + + guard canParseData() else { + return + } + + opmlDocument = OPMLDocument(url: parserData.url) + push(opmlDocument!) + + let saxParser = SAXParser(delegate: self, data: data) + saxParser.parse() + } + + func canParseData() -> Bool { + + data.containsASCIIString(" 0 else { + assertionFailure("itemStack.count must be > 0") + return + } + + itemStack.removeLast() + } +} + +extension OPMLParser: SAXParserDelegate { + + private struct XMLName { + static let title = "title".utf8CString + static let outline = "outline".utf8CString + } + + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + + if SAXEqualTags(localName, XMLName.title) { + saxParser.beginStoringCharacters() + return + } + + if !SAXEqualTags(localName, XMLName.outline) { + return + } + + let attributesDictionary = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) + let item = OPMLItem(attributes: attributesDictionary) + + currentItem?.add(item) + push(item) + } + + public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + if SAXEqualTags(localName, XMLName.title) { + if let item = currentItem as? OPMLDocument { + item.title = saxParser.currentStringWithTrimmedWhitespace + } + saxParser.endStoringCharacters() + return + } + + if SAXEqualTags(localName, XMLName.outline) { + popItem() + } + } + + public func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + + // Nothing to do, but method is required. + } +} diff --git a/Modules/Parser/Sources/Parser/ParserData+Parser.swift b/Modules/Parser/Sources/Parser/ParserData+Parser.swift deleted file mode 100644 index 1563bafd9..000000000 --- a/Modules/Parser/Sources/Parser/ParserData+Parser.swift +++ /dev/null @@ -1,11 +0,0 @@ -// -// File.swift -// -// -// Created by Brent Simmons on 4/7/24. -// - -import Foundation -import ParserObjC - -extension ParserData: @unchecked Sendable {} diff --git a/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift b/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift deleted file mode 100644 index de80eb2ce..000000000 --- a/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift +++ /dev/null @@ -1,11 +0,0 @@ -// -// File.swift -// -// -// Created by Brent Simmons on 4/7/24. -// - -import Foundation -import ParserObjC - -extension RSHTMLMetadataParser: @unchecked Sendable {} diff --git a/Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift b/Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift new file mode 100644 index 000000000..036ef3cd7 --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift @@ -0,0 +1,68 @@ +// +// Data+Parser.swift +// +// +// Created by Brent Simmons on 8/24/24. +// + +import Foundation + +public extension Data { + + /// Return true if the data contains a given String. + /// + /// Assumes that the data is UTF-8 or similar encoding — + /// if it’s UTF-16 or UTF-32, for instance, this will always return false. + /// Luckily these are rare. + /// + /// The String to search for should be something that could be encoded + /// in ASCII — like " Bool { + + contains(searchFor.utf8) + } + + /// Return true if searchFor appears in self. + func contains(_ searchFor: Data) -> Bool { + + let searchForCount = searchFor.count + let dataCount = self.count + + guard searchForCount > 0, searchForCount <= dataCount else { + return false + } + + let searchForInitialByte = searchFor[0] + var found = false + + self.withUnsafeBytes { bytes in + + let buffer = bytes.bindMemory(to: UInt8.self) + + for i in 0...dataCount - searchForCount { + + if buffer[i] == searchForInitialByte { + + var match = true + + for j in 1.. String? { + + if let object = self[key] { + return object + } + + let lowercaseKey = key.lowercased() + + for (oneKey, oneValue) in self { + if lowercaseKey.caseInsensitiveCompare(oneKey) == .orderedSame { + return oneValue + } + } + + return nil + } +} diff --git a/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift b/Modules/Parser/Sources/Parser/SAX/Extensions/String+SAX.swift similarity index 70% rename from Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift rename to Modules/Parser/Sources/Parser/SAX/Extensions/String+SAX.swift index 9922be93a..8a5882498 100644 --- a/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift +++ b/Modules/Parser/Sources/Parser/SAX/Extensions/String+SAX.swift @@ -8,10 +8,16 @@ import Foundation -extension String { +public extension String { var nilIfEmptyOrWhitespace: String? { return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self } + static func isEmptyOrNil(_ s: String?) -> Bool { + if let s { + return s.isEmpty + } + return true + } } diff --git a/Modules/Parser/Sources/Parser/SAX/ParserData.swift b/Modules/Parser/Sources/Parser/SAX/ParserData.swift new file mode 100644 index 000000000..67fc71a3f --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAX/ParserData.swift @@ -0,0 +1,19 @@ +// +// ParserData.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct ParserData: Sendable { + + public let url: String + public let data: Data + + public init(url: String, data: Data) { + self.url = url + self.data = data + } +} diff --git a/Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift new file mode 100644 index 000000000..7d5c2ed77 --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift @@ -0,0 +1,200 @@ +// +// SAXHTMLParser.swift +// +// +// Created by Brent Simmons on 8/26/24. +// + +import Foundation +import FoundationExtras +import libxml2 + +public protocol SAXHTMLParserDelegate: AnyObject { + + func saxHTMLParser(_: SAXHTMLParser, startElement: XMLPointer, attributes: UnsafePointer?) + + func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) + + // Length is guaranteed to be greater than 0. + func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) +} + +public final class SAXHTMLParser { + + fileprivate let delegate: SAXHTMLParserDelegate + + public var currentCharacters: Data? { // UTF-8 encoded + + guard storingCharacters else { + return nil + } + return characters + } + + // Conveniences to get string version of currentCharacters + + public var currentString: String? { + + guard let d = currentCharacters, !d.isEmpty else { + return nil + } + return String(data: d, encoding: .utf8) + } + + public var currentStringWithTrimmedWhitespace: String? { + + guard let s = currentString else { + return nil + } + return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) + } + + private var data: Data + private var storingCharacters = false + private var characters = Data() + + public init(delegate: SAXHTMLParserDelegate, data: Data) { + + self.delegate = delegate + self.data = data + } + + public func parse() { + + guard !data.isEmpty else { + return + } + + data.withUnsafeBytes { bufferPointer in + + guard let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress else { + return + } + + let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count)) + let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding) + htmlCtxtUseOptions(context, Int32(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NONET.rawValue | HTML_PARSE_COMPACT.rawValue | HTML_PARSE_NOERROR.rawValue | HTML_PARSE_NOWARNING.rawValue)) + + htmlParseChunk(context, bytes, Int32(data.count), 0) + + htmlParseChunk(context, nil, 0, 1) + htmlFreeParserCtxt(context) + } + } + + /// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement. + public func beginStoringCharacters() { + + storingCharacters = true + characters.count = 0 + } + + public func endStoringCharacters() { + + storingCharacters = false + characters.count = 0 + } + + public func attributesDictionary(_ attributes: UnsafePointer?) -> StringDictionary? { + + guard let attributes else { + return nil + } + + var dictionary = [String: String]() + var ix = 0 + var currentKey: String? = nil + + while true { + let oneAttribute = attributes[ix] + ix += 1 + + if currentKey == nil && oneAttribute == nil { + break + } + + if currentKey == nil { + if let oneAttribute { + currentKey = String(cString: oneAttribute) + } + } else { + let value: String? + if let oneAttribute { + value = String(cString: oneAttribute) + } else { + value = nil + } + + dictionary[currentKey!] = value ?? "" + currentKey = nil + } + } + + return dictionary + } +} + +private extension SAXHTMLParser { + + func charactersFound(_ htmlCharacters: XMLPointer, count: Int) { + + if storingCharacters { + characters.append(htmlCharacters, count: count) + } + + delegate.saxHTMLParser(self, charactersFound: htmlCharacters, count: count) + } + + func startElement(_ name: XMLPointer, attributes: UnsafePointer?) { + + delegate.saxHTMLParser(self, startElement: name, attributes: attributes) + } + + func endElement(_ name: XMLPointer) { + + delegate.saxHTMLParser(self, endElement: name) + endStoringCharacters() + } +} + +private func parser(from context: UnsafeMutableRawPointer) -> SAXHTMLParser { + + Unmanaged.fromOpaque(context).takeUnretainedValue() +} + +nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = { + + var handler = htmlSAXHandler() + + handler.characters = { (context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) in + + guard let context, let ch, len > 0 else { + return + } + + let parser = parser(from: context) + parser.charactersFound(ch, count: Int(len)) + } + + handler.startElement = { (context: UnsafeMutableRawPointer?, name: XMLPointer?, attributes: UnsafeMutablePointer?) in + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.startElement(name, attributes: attributes) + } + + handler.endElement = { (context: UnsafeMutableRawPointer?, name: XMLPointer?) in + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.endElement(name) + } + + return handler +}() diff --git a/Modules/Parser/Sources/Parser/SAX/SAXParser.swift b/Modules/Parser/Sources/Parser/SAX/SAXParser.swift new file mode 100644 index 000000000..ace5a04a6 --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAX/SAXParser.swift @@ -0,0 +1,204 @@ +// +// SAXParser.swift. +// +// +// Created by Brent Simmons on 8/12/24. +// + +import Foundation +import FoundationExtras +import libxml2 + +public typealias XMLPointer = UnsafePointer + +public protocol SAXParserDelegate { + + func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) + + func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) + + func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) +} + +public final class SAXParser { + + fileprivate let delegate: SAXParserDelegate + + public var currentCharacters: Data? { // UTF-8 encoded + + guard storingCharacters else { + return nil + } + return characters + } + + // Conveniences to get string version of currentCharacters + + public var currentString: String? { + + guard let d = currentCharacters, !d.isEmpty else { + return nil + } + return String(data: d, encoding: .utf8) + } + + public var currentStringWithTrimmedWhitespace: String? { + + guard let s = currentString else { + return nil + } + return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) + } + + private var data: Data + private var storingCharacters = false + private var characters = Data() + + public init(delegate: SAXParserDelegate, data: Data) { + + self.delegate = delegate + self.data = data + } + + public func parse() { + + guard !data.isEmpty else { + return + } + + let context = xmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil) + xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue)) + + data.withUnsafeBytes { bufferPointer in + if let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress { + xmlParseChunk(context, bytes, Int32(data.count), 0) + } + } + + xmlParseChunk(context, nil, 0, 1) + xmlFreeParserCtxt(context) + } + + /// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement. + public func beginStoringCharacters() { + + storingCharacters = true + characters.count = 0 + } + + public func endStoringCharacters() { + + storingCharacters = false + characters.count = 0 + } + + public func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> StringDictionary? { + + guard attributeCount > 0, let attributes else { + return nil + } + + var dictionary = [String: String]() + + let fieldCount = 5 + var i = 0, j = 0 + while i < attributeCount { + + guard let attribute = attributes[j] else { + continue + } + let prefix = attributes[j + 1] + var attributeName = String(cString: attribute) + if let prefix { + let attributePrefix = String(cString: prefix) + attributeName = "\(attributePrefix):\(attributeName)" + } + + guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else { + continue + } + let valueCount = valueEnd - valueStart + let value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8) + + if let value { + dictionary[attributeName] = value + } + + i += 1 + j += fieldCount + } + + return dictionary + } +} + +private extension SAXParser { + + func charactersFound(_ xmlCharacters: XMLPointer, count: Int) { + + if storingCharacters { + characters.append(xmlCharacters, count: count) + } + + delegate.saxParser(self, xmlCharactersFound: xmlCharacters, count: count) + } + + func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + + delegate.saxParser(self, xmlStartElement: name, prefix: prefix, uri: uri, namespaceCount: namespaceCount, namespaces: namespaces, attributeCount: attributeCount, attributesDefaultedCount: attributesDefaultedCount, attributes: attributes) + } + + func endElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + delegate.saxParser(self, xmlEndElement: name, prefix: prefix, uri: uri) + endStoringCharacters() + } +} + +private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer?) { + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.startElement(name, prefix: prefix, uri: URI, namespaceCount: Int(nb_namespaces), namespaces: namespaces, attributeCount: Int(nb_attributes), attributesDefaultedCount: Int(nb_defaulted), attributes: attributes) +} + +private func endElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?) { + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.endElement(name, prefix: prefix, uri: URI) +} + +private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) { + + guard let context, let ch, len > 0 else { + return + } + + let parser = parser(from: context) + parser.charactersFound(ch, count: Int(len)) +} + +private func parser(from context: UnsafeMutableRawPointer) -> SAXParser { + + Unmanaged.fromOpaque(context).takeUnretainedValue() +} + +nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = { + + var handler = xmlSAXHandler() + + handler.characters = charactersFound + handler.startElementNs = startElement + handler.endElementNs = endElement + handler.initialized = XML_SAX2_MAGIC + + return handler +}() + diff --git a/Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift b/Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift new file mode 100644 index 000000000..3bb680e89 --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift @@ -0,0 +1,41 @@ +// +// SAXUtilities.swift +// +// +// Created by Brent Simmons on 8/26/24. +// + +import Foundation +import libxml2 + +public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray) -> Bool { + + return tag.withUnsafeBufferPointer { bufferPointer in + + let tagCount = tag.count // includes 0 terminator + + for i in 0.. Date { + var dateComponents = DateComponents() + dateComponents.calendar = Calendar.current + dateComponents.timeZone = TimeZone(secondsFromGMT: 0) + + dateComponents.year = year + dateComponents.month = month + dateComponents.day = day + dateComponents.hour = hour + dateComponents.minute = minute + dateComponents.second = second + dateComponents.nanosecond = millisecond * 1000000 + + return dateComponents.date! + } + + func testDateWithString() { + var expectedDateResult = dateWithValues(2010, 5, 28, 21, 3, 38) + + var d = date("Fri, 28 May 2010 21:03:38 +0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 +00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 -00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 -0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 GMT") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38+00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38+0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38-0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38-00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38Z") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 7, 13, 17, 6, 40) + d = date("2010-07-13T17:06:40+00:00") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 4, 30, 12, 0, 0) + d = date("30 Apr 2010 5:00 PDT") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 5, 21, 21, 22, 53) + d = date("21 May 2010 21:22:53 GMT") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 6, 9, 5, 0, 0) + d = date("Wed, 09 Jun 2010 00:00 EST") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 6, 23, 3, 43, 50) + d = date("Wed, 23 Jun 2010 03:43:50 Z") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 6, 22, 3, 57, 49) + d = date("2010-06-22T03:57:49+00:00") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07) + d = date("2010-11-17T08:40:07-05:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testAtomDateWithMissingTCharacter() { + let expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07) + let d = date("2010-11-17 08:40:07-05:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testFeedbinDate() { + let expectedDateResult = dateWithValues(2019, 9, 27, 21, 01, 48) + let d = date("2019-09-27T21:01:48.000000Z") + XCTAssertEqual(d, expectedDateResult) + } + + func testMillisecondDate() { + let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56, 516) + let d = date("2021-03-29T10:46:56.516+00:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testExtraMillisecondPrecisionDate() { + let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56, 516) + let d = date("2021-03-29T10:46:56.516941+00:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testW3CParsingPerformance() { + + // 0.0001 seconds on my Mac Studio M1 + self.measure { + _ = date("2021-03-29T10:46:56.516941+00:00") + } + } + + func testPubDateParsingPerformance() { + + // 0.0001 seconds on my Mac Studio M1 + self.measure { + _ = date("21 May 2010 21:22:53 GMT") + } + } +} + +private extension DateParserTests { + + func date(_ string: String) -> Date? { + let d = Data(string.utf8) + return DateParser.date(data: d) + } +} diff --git a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift index b72c26b34..db8112cf3 100644 --- a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift +++ b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift @@ -9,39 +9,70 @@ import XCTest import Parser -class EntityDecodingTests: XCTestCase { +final class EntityDecodingTests: XCTestCase { func test39Decoding() { // Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog. let s = "These are the times that try men's souls." - let decoded = s.rsparser_stringByDecodingHTMLEntities() + let decoded = HTMLEntityDecoder.decodedString(s) XCTAssertEqual(decoded, "These are the times that try men's souls.") } - func testEntities() { + func testEntityAtBeginning() { + + let s = "'leading single quote" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "'leading single quote") + } + + func testEntityAtEnd() { + + let s = "trailing single quote'" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "trailing single quote'") + } + + func testEntityInMiddle() { + + let s = "entity ç in middle" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "entity ç in middle") + } + + func testMultipleEntitiesInARow() { + + let s = "çèmult……iple 'æ"entities÷♥" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "çèmult……iple 'æ\"entities÷♥") + } + + func testOnlyEntity() { var s = "…" - var decoded = s.rsparser_stringByDecodingHTMLEntities() + var decoded = HTMLEntityDecoder.decodedString(s) XCTAssertEqual(decoded, "…") s = "…" - decoded = s.rsparser_stringByDecodingHTMLEntities() + decoded = HTMLEntityDecoder.decodedString(s) XCTAssertEqual(decoded, "…") s = "'" - decoded = s.rsparser_stringByDecodingHTMLEntities() + decoded = HTMLEntityDecoder.decodedString(s) XCTAssertEqual(decoded, "'") s = "§" - decoded = s.rsparser_stringByDecodingHTMLEntities() + decoded = HTMLEntityDecoder.decodedString(s) XCTAssertEqual(decoded, "§") s = "£" - decoded = s.rsparser_stringByDecodingHTMLEntities() + decoded = HTMLEntityDecoder.decodedString(s) XCTAssertEqual(decoded, "£") - } } diff --git a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift index 931db3fe2..3649e5acd 100644 --- a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift @@ -7,38 +7,37 @@ // import XCTest -import Parser -import ParserObjC +@testable import Parser -class FeedParserTypeTests: XCTestCase { +final class FeedParserTypeTests: XCTestCase { // MARK: HTML func testDaringFireballHTMLType() { let d = parserData("DaringFireball", "html", "http://daringfireball.net/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .notAFeed) } func testFurboHTMLType() { let d = parserData("furbo", "html", "http://furbo.org/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .notAFeed) } func testInessentialHTMLType() { let d = parserData("inessential", "html", "http://inessential.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .notAFeed) } func testSixColorsHTMLType() { let d = parserData("sixcolors", "html", "https://sixcolors.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .notAFeed) } @@ -47,68 +46,68 @@ class FeedParserTypeTests: XCTestCase { func testEMarleyRSSType() { let d = parserData("EMarley", "rss", "https://medium.com/@emarley") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testScriptingNewsRSSType() { let d = parserData("scriptingNews", "rss", "http://scripting.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testKatieFloydRSSType() { let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testMantonRSSType() { let d = parserData("manton", "rss", "http://manton.org/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testDCRainmakerRSSType() { let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testMacworldRSSType() { let d = parserData("macworld", "rss", "https://www.macworld.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testNatashaTheRobotRSSType() { let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testDontHitSaveRSSWithBOMType() { let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testBioRDF() { let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } func testPHPXML() { let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rss) } @@ -118,20 +117,20 @@ class FeedParserTypeTests: XCTestCase { // File extension is .rss, but it’s really an Atom feed. let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .atom) } func testOneFootTsunamiAtomType() { let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .atom) } func testRussCoxAtomType() { let d = parserData("russcox", "atom", "https://research.swtch.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .atom) } @@ -140,7 +139,7 @@ class FeedParserTypeTests: XCTestCase { func testScriptingNewsJSONType() { let d = parserData("ScriptingNews", "json", "http://scripting.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .rssInJSON) } @@ -149,35 +148,35 @@ class FeedParserTypeTests: XCTestCase { func testInessentialJSONFeedType() { let d = parserData("inessential", "json", "http://inessential.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .jsonFeed) } func testAllThisJSONFeedType() { let d = parserData("allthis", "json", "http://leancrew.com/allthis/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .jsonFeed) } func testCurtJSONFeedType() { let d = parserData("curt", "json", "http://curtclifton.net/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .jsonFeed) } func testPixelEnvyJSONFeedType() { let d = parserData("pxlnv", "json", "http://pxlnv.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .jsonFeed) } func testRoseJSONFeedType() { let d = parserData("rose", "json", "https://www.rosemaryorchard.com/") - let type = feedType(d) + let type = FeedType.feedType(d.data) XCTAssertTrue(type == .jsonFeed) } @@ -189,7 +188,7 @@ class FeedParserTypeTests: XCTestCase { // The type detector should return .unknown rather than .notAFeed. let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/") - let type = feedType(d, isPartialData: true) + let type = FeedType.feedType(d.data, isPartialData: true) XCTAssertEqual(type, .unknown) } @@ -201,7 +200,7 @@ class FeedParserTypeTests: XCTestCase { let d = parserData("EMarley", "rss", "https://medium.com/@emarley") self.measure { - let _ = feedType(d) + let _ = FeedType.feedType(d.data) } } @@ -211,7 +210,7 @@ class FeedParserTypeTests: XCTestCase { let d = parserData("inessential", "json", "http://inessential.com/") self.measure { - let _ = feedType(d) + let _ = FeedType.feedType(d.data) } } @@ -221,7 +220,7 @@ class FeedParserTypeTests: XCTestCase { let d = parserData("DaringFireball", "html", "http://daringfireball.net/") self.measure { - let _ = feedType(d) + let _ = FeedType.feedType(d.data) } } @@ -231,15 +230,7 @@ class FeedParserTypeTests: XCTestCase { let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") self.measure { - let _ = feedType(d) + let _ = FeedType.feedType(d.data) } } - -} - -func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { - let filename = "Resources/\(filename)" - let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! - let data = try! Data(contentsOf: URL(fileURLWithPath: path)) - return ParserData(url: url, data: data) } diff --git a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift index 157445587..7e7c69acc 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift @@ -8,23 +8,22 @@ import XCTest import Parser -import ParserObjC -class HTMLLinkTests: XCTestCase { +final class HTMLLinkTests: XCTestCase { func testSixColorsPerformance() { // 0.003 sec on my 2012 iMac let d = parserData("sixcolors", "html", "http://sixcolors.com/") self.measure { - let _ = RSHTMLLinkParser.htmlLinks(with: d) + let _ = HTMLLinkParser.htmlLinks(with: d) } } func testSixColorsLink() { let d = parserData("sixcolors", "html", "http://sixcolors.com/") - let links = RSHTMLLinkParser.htmlLinks(with: d) + let links = HTMLLinkParser.htmlLinks(with: d) let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php" let textToFind = "this week’s episode of The Incomparable" @@ -39,5 +38,4 @@ class HTMLLinkTests: XCTestCase { XCTAssertTrue(found) XCTAssertEqual(links.count, 131) } - } diff --git a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift index fed009ee6..6adc5de7f 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift @@ -8,20 +8,19 @@ import XCTest import Parser -import ParserObjC -class HTMLMetadataTests: XCTestCase { +final class HTMLMetadataTests: XCTestCase { func testDaringFireball() { let d = parserData("DaringFireball", "html", "http://daringfireball.net/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) + let metadata = HTMLMetadataParser.metadata(with: d) - XCTAssertEqual(metadata.favicons.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") + XCTAssertEqual(metadata.favicons?.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") - XCTAssertEqual(metadata.feedLinks.count, 1) + XCTAssertEqual(metadata.feedLinks?.count, 1) - let feedLink = metadata.feedLinks.first! + let feedLink: HTMLMetadataFeedLink = (metadata.feedLinks?.first!)! XCTAssertNil(feedLink.title) XCTAssertEqual(feedLink.type, "application/atom+xml") XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main") @@ -32,20 +31,20 @@ class HTMLMetadataTests: XCTestCase { // 0.002 sec on my 2012 iMac let d = parserData("DaringFireball", "html", "http://daringfireball.net/") self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) + let _ = HTMLMetadataParser.metadata(with: d) } } func testFurbo() { let d = parserData("furbo", "html", "http://furbo.org/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) + let metadata = HTMLMetadataParser.metadata(with: d) - XCTAssertEqual(metadata.favicons.first?.urlString, "http://furbo.org/favicon.ico") + XCTAssertEqual(metadata.favicons?.first?.urlString, "http://furbo.org/favicon.ico") - XCTAssertEqual(metadata.feedLinks.count, 1) + XCTAssertEqual(metadata.feedLinks?.count, 1) - let feedLink = metadata.feedLinks.first! + let feedLink = (metadata.feedLinks?.first!)! XCTAssertEqual(feedLink.title, "Iconfactory News Feed") XCTAssertEqual(feedLink.type, "application/rss+xml") } @@ -55,24 +54,24 @@ class HTMLMetadataTests: XCTestCase { // 0.001 sec on my 2012 iMac let d = parserData("furbo", "html", "http://furbo.org/") self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) + let _ = HTMLMetadataParser.metadata(with: d) } } func testInessential() { let d = parserData("inessential", "html", "http://inessential.com/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) + let metadata = HTMLMetadataParser.metadata(with: d) - XCTAssertNil(metadata.favicons.first?.urlString) + XCTAssertNil(metadata.favicons?.first?.urlString) - XCTAssertEqual(metadata.feedLinks.count, 1) - let feedLink = metadata.feedLinks.first! + XCTAssertEqual(metadata.feedLinks?.count, 1) + let feedLink = (metadata.feedLinks?.first!)! XCTAssertEqual(feedLink.title, "RSS") XCTAssertEqual(feedLink.type, "application/rss+xml") XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml") - XCTAssertEqual(metadata.appleTouchIcons.count, 0); + XCTAssertEqual(metadata.appleTouchIcons?.count ?? 0, 0); } func testInessentialPerformance() { @@ -80,7 +79,7 @@ class HTMLMetadataTests: XCTestCase { // 0.001 sec on my 2012 iMac let d = parserData("inessential", "html", "http://inessential.com/") self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) + let _ = HTMLMetadataParser.metadata(with: d) } } @@ -89,25 +88,25 @@ class HTMLMetadataTests: XCTestCase { // 0.004 sec on my 2012 iMac let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) + let _ = HTMLMetadataParser.metadata(with: d) } } func testSixColors() { let d = parserData("sixcolors", "html", "http://sixcolors.com/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) + let metadata = HTMLMetadataParser.metadata(with: d) - XCTAssertEqual(metadata.favicons.first?.urlString, "https://sixcolors.com/images/favicon.ico") + XCTAssertEqual(metadata.favicons?.first?.urlString, "https://sixcolors.com/images/favicon.ico") - XCTAssertEqual(metadata.feedLinks.count, 1); - let feedLink = metadata.feedLinks.first! + XCTAssertEqual(metadata.feedLinks?.count, 1); + let feedLink = (metadata.feedLinks?.first!)! XCTAssertEqual(feedLink.title, "RSS"); XCTAssertEqual(feedLink.type, "application/rss+xml"); XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors"); - XCTAssertEqual(metadata.appleTouchIcons.count, 6); - let icon = metadata.appleTouchIcons[3]; + XCTAssertEqual(metadata.appleTouchIcons!.count, 6); + let icon = metadata.appleTouchIcons![3]; XCTAssertEqual(icon.rel, "apple-touch-icon"); XCTAssertEqual(icon.sizes, "120x120"); XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png"); @@ -118,24 +117,24 @@ class HTMLMetadataTests: XCTestCase { // 0.002 sec on my 2012 iMac let d = parserData("sixcolors", "html", "http://sixcolors.com/") self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) + let _ = HTMLMetadataParser.metadata(with: d) } } func testCocoOGImage() { let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - let openGraphData = metadata.openGraphProperties - let image = openGraphData.images.first! + let metadata = HTMLMetadataParser.metadata(with: d) + let openGraphData = metadata.openGraphProperties! + let image = openGraphData.image! XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") } func testCocoTwitterImage() { let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - let twitterData = metadata.twitterProperties + let metadata = HTMLMetadataParser.metadata(with: d) + let twitterData = metadata.twitterProperties! let imageURL = twitterData.imageURL! XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") } @@ -143,10 +142,10 @@ class HTMLMetadataTests: XCTestCase { func testYouTube() { // YouTube is a special case — the feed links appear after the head section, in the body section. let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) + let metadata = HTMLMetadataParser.metadata(with: d) - XCTAssertEqual(metadata.feedLinks.count, 1); - let feedLink = metadata.feedLinks.first! + XCTAssertEqual(metadata.feedLinks!.count, 1); + let feedLink = metadata.feedLinks!.first! XCTAssertEqual(feedLink.title, "RSS"); XCTAssertEqual(feedLink.type, "application/rss+xml"); XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ"); diff --git a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift index fc3af8e5e..f96dde79e 100644 --- a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift @@ -9,14 +9,14 @@ import XCTest import Parser -class JSONFeedParserTests: XCTestCase { +final class JSONFeedParserTests: XCTestCase { func testInessentialPerformance() { // 0.001 sec on my 2012 iMac. let d = parserData("inessential", "json", "http://inessential.com/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -25,14 +25,14 @@ class JSONFeedParserTests: XCTestCase { // 0.009 sec on my 2012 iMac. let d = parserData("DaringFireball", "json", "http://daringfireball.net/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } func testGettingFaviconAndIconURLs() async { let d = parserData("DaringFireball", "json", "http://daringfireball.net/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png") XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png") @@ -41,7 +41,7 @@ class JSONFeedParserTests: XCTestCase { func testAllThis() async { let d = parserData("allthis", "json", "http://leancrew.com/allthis/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 12) } @@ -49,7 +49,7 @@ class JSONFeedParserTests: XCTestCase { func testCurt() async { let d = parserData("curt", "json", "http://curtclifton.net/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 26) @@ -67,20 +67,20 @@ class JSONFeedParserTests: XCTestCase { func testPixelEnvy() async { let d = parserData("pxlnv", "json", "http://pxlnv.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 20) } func testRose() async { let d = parserData("rose", "json", "http://www.rosemaryorchard.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 84) } func test3960() async { let d = parserData("3960", "json", "http://journal.3960.org/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 20) XCTAssertEqual(parsedFeed.language, "de-DE") @@ -91,7 +91,7 @@ class JSONFeedParserTests: XCTestCase { func testAuthors() async { let d = parserData("authors", "json", "https://example.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 4) let rootAuthors = Set([ diff --git a/Modules/Parser/Tests/ParserTests/OPMLTests.swift b/Modules/Parser/Tests/ParserTests/OPMLTests.swift index 3beaf0421..563cd149e 100644 --- a/Modules/Parser/Tests/ParserTests/OPMLTests.swift +++ b/Modules/Parser/Tests/ParserTests/OPMLTests.swift @@ -7,32 +7,33 @@ // import XCTest -import Parser -import ParserObjC +@testable import Parser -class OPMLTests: XCTestCase { +final class OPMLTests: XCTestCase { let subsData = parserData("Subs", "opml", "http://example.org/") func testOPMLParsingPerformance() { - // 0.002 sec on my 2012 iMac. + // 0.003 sec on my M1 Mac Studio 2022 self.measure { - let _ = try! RSOPMLParser.parseOPML(with: self.subsData) + let _ = OPMLParser.document(with: self.subsData) } } func testNotOPML() { let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") - XCTAssertThrowsError(try RSOPMLParser.parseOPML(with: d)) + XCTAssertNil(OPMLParser.document(with: d)) } func testSubsStructure() { - let opmlDocument = try! RSOPMLParser.parseOPML(with: subsData) - XCTAssertEqual("Subs", opmlDocument.title) - XCTAssertEqual("http://example.org/", opmlDocument.url) - recursivelyCheckOPMLStructure(opmlDocument) + let opmlDocument = OPMLParser.document(with: subsData) + XCTAssertNotNil(opmlDocument) + + XCTAssertEqual("Subs", opmlDocument!.title) + XCTAssertEqual("http://example.org/", opmlDocument!.url) + recursivelyCheckOPMLStructure(opmlDocument!) } @@ -42,23 +43,23 @@ class OPMLTests: XCTestCase { // which appears to be true with OPML generated by The Old Reader. let d = parserData("SubsNoTitleAttributes", "opml", "http://example.org/") - let opmlDocument = try! RSOPMLParser.parseOPML(with: d) - recursivelyCheckOPMLStructure(opmlDocument) + let opmlDocument = OPMLParser.document(with: d) + recursivelyCheckOPMLStructure(opmlDocument!) } } private extension OPMLTests { - func recursivelyCheckOPMLStructure(_ item: RSOPMLItem) { + func recursivelyCheckOPMLStructure(_ item: OPMLItem) { let feedSpecifier = item.feedSpecifier - if !(item is RSOPMLDocument) { - XCTAssertNotNil((item.attributes! as NSDictionary).opml_text) + if !(item is OPMLDocument) { + XCTAssertNotNil(item.attributes!.opml_text) } // If it has no children, it should have a feed specifier. The converse is also true. - var isFolder = item.children != nil && item.children!.count > 0 - if !isFolder && (item.attributes! as NSDictionary).opml_title == "Skip" { + var isFolder = item.items != nil && item.items!.count > 0 + if !isFolder && item.attributes?.opml_title == "Skip" { isFolder = true } @@ -70,10 +71,17 @@ private extension OPMLTests { XCTAssertNil(feedSpecifier) } - if item.children != nil && item.children!.count > 0 { - for oneItem in item.children! { + if item.items != nil && item.items!.count > 0 { + for oneItem in item.items! { recursivelyCheckOPMLStructure(oneItem) } } } } + +func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { + let filename = "Resources/\(filename)" + let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! + let data = try! Data(contentsOf: URL(fileURLWithPath: path)) + return ParserData(url: url, data: data) +} diff --git a/Modules/Parser/Tests/ParserTests/ParserTests.swift b/Modules/Parser/Tests/ParserTests/ParserTests.swift deleted file mode 100644 index e00240408..000000000 --- a/Modules/Parser/Tests/ParserTests/ParserTests.swift +++ /dev/null @@ -1,12 +0,0 @@ -import XCTest -@testable import Parser - -final class ParserTests: XCTestCase { - func testExample() throws { - // XCTest Documentation - // https://developer.apple.com/documentation/xctest - - // Defining Test Cases and Test Methods - // https://developer.apple.com/documentation/xctest/defining_test_cases_and_test_methods - } -} diff --git a/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift b/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift deleted file mode 100644 index 740bdf046..000000000 --- a/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift +++ /dev/null @@ -1,109 +0,0 @@ -// -// RSDateParserTests.swift -// -// -// Created by Maurice Parker on 4/1/21. -// - -import Foundation -import XCTest -import Parser - -class RSDateParserTests: XCTestCase { - - static func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int = 0) -> Date { - var dateComponents = DateComponents() - dateComponents.calendar = Calendar.current - dateComponents.timeZone = TimeZone(secondsFromGMT: 0) - - dateComponents.year = year - dateComponents.month = month - dateComponents.day = day - dateComponents.hour = hour - dateComponents.minute = minute - dateComponents.second = second - dateComponents.nanosecond = milliseconds * 1000000 - - return dateComponents.date! - } - - func testDateWithString() { - var expectedDateResult = Self.dateWithValues(2010, 5, 28, 21, 3, 38) - - var d = RSDateWithString("Fri, 28 May 2010 21:03:38 +0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 +00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 -00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 -0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 GMT") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38+00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38+0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38-0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38-00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38Z") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 7, 13, 17, 6, 40) - d = RSDateWithString("2010-07-13T17:06:40+00:00") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 4, 30, 12, 0, 0) - d = RSDateWithString("30 Apr 2010 5:00 PDT") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 5, 21, 21, 22, 53) - d = RSDateWithString("21 May 2010 21:22:53 GMT") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 6, 9, 5, 0, 0) - d = RSDateWithString("Wed, 09 Jun 2010 00:00 EST") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 6, 23, 3, 43, 50) - d = RSDateWithString("Wed, 23 Jun 2010 03:43:50 Z") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 6, 22, 3, 57, 49) - d = RSDateWithString("2010-06-22T03:57:49+00:00") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07) - d = RSDateWithString("2010-11-17T08:40:07-05:00") - XCTAssertEqual(d, expectedDateResult) - } - - func testAtomDateWithMissingTCharacter() { - let expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07) - let d = RSDateWithString("2010-11-17 08:40:07-05:00") - XCTAssertEqual(d, expectedDateResult) - } - - func testFeedbinDate() { - let expectedDateResult = Self.dateWithValues(2019, 9, 27, 21, 01, 48) - let d = RSDateWithString("2019-09-27T21:01:48.000000Z") - XCTAssertEqual(d, expectedDateResult) - } - - func testHighMillisecondDate() { - let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56, 516) - let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00") - XCTAssertEqual(d!.timeIntervalSince1970, expectedDateResult.timeIntervalSince1970, accuracy: 0.000001) - } -} diff --git a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift index cbfce4d13..240076af2 100644 --- a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift @@ -9,20 +9,28 @@ import XCTest import Parser -class RSSInJSONParserTests: XCTestCase { +final class RSSInJSONParserTests: XCTestCase { func testScriptingNewsPerformance() { // 0.003 sec on my 2012 iMac. let d = parserData("ScriptingNews", "json", "http://scripting.com/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } func testFeedLanguage() { let d = parserData("ScriptingNews", "json", "http://scripting.com/") - let parsedFeed = try! FeedParser.parseSync(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.language, "en-us") } } + +extension FeedParser { + + static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + + try FeedParser.parse(urlString: parserData.url, data: parserData.data) + } +} diff --git a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift index c60229ca3..62f9d0294 100644 --- a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift @@ -9,7 +9,7 @@ import XCTest import Parser -class RSSParserTests: XCTestCase { +final class RSSParserTests: XCTestCase { func testScriptingNewsPerformance() { @@ -17,7 +17,7 @@ class RSSParserTests: XCTestCase { // 0.002 2022 Mac Studio let d = parserData("scriptingNews", "rss", "http://scripting.com/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -27,7 +27,7 @@ class RSSParserTests: XCTestCase { // 0.001 2022 Mac Studio let d = parserData("KatieFloyd", "rss", "http://katiefloyd.com/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -37,7 +37,7 @@ class RSSParserTests: XCTestCase { // 0.0004 2022 Mac Studio let d = parserData("EMarley", "rss", "https://medium.com/@emarley") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -47,21 +47,23 @@ class RSSParserTests: XCTestCase { // 0.0006 2022 Mac Studio let d = parserData("manton", "rss", "http://manton.org/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } func testNatashaTheRobot() async { let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 10) } func testTheOmniShowAttachments() async { let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNotNil(article.attachments) @@ -78,7 +80,9 @@ class RSSParserTests: XCTestCase { func testTheOmniShowUniqueIDs() async { let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNotNil(article.uniqueID) @@ -91,7 +95,7 @@ class RSSParserTests: XCTestCase { // Macworld’s feed doesn’t have guids, so they should be calculated unique IDs. let d = parserData("macworld", "rss", "https://www.macworld.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! for article in parsedFeed.items { XCTAssertNotNil(article.uniqueID) @@ -104,7 +108,9 @@ class RSSParserTests: XCTestCase { // Macworld uses names instead of email addresses (despite the RSS spec saying they should be email addresses). let d = parserData("macworld", "rss", "https://www.macworld.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { @@ -123,7 +129,9 @@ class RSSParserTests: XCTestCase { // detect this situation, and every article in the feed should have a permalink. let d = parserData("monkeydom", "rss", "https://coding.monkeydom.de/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNil(article.url) @@ -136,7 +144,9 @@ class RSSParserTests: XCTestCase { // https://github.com/brentsimmons/NetNewsWire/issues/529 let d = parserData("atp", "rss", "http://atp.fm/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNotNil(article.contentHTML) @@ -145,7 +155,10 @@ class RSSParserTests: XCTestCase { func testFeedKnownToHaveGuidsThatArentPermalinks() async { let d = parserData("livemint", "xml", "https://www.livemint.com/rss/news") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) + for article in parsedFeed.items { XCTAssertNil(article.url) } @@ -155,7 +168,10 @@ class RSSParserTests: XCTestCase { // This feed uses atom authors, and we don’t want author/title to be used as item/title. // https://github.com/brentsimmons/NetNewsWire/issues/943 let d = parserData("cloudblog", "rss", "https://cloudblog.withgoogle.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) + for article in parsedFeed.items { XCTAssertNotEqual(article.title, "Product Manager, Office of the CTO") XCTAssertNotEqual(article.title, "Developer Programs Engineer") @@ -167,26 +183,29 @@ class RSSParserTests: XCTestCase { // This invalid feed has elements inside s. // 17 Jan 2021 bug report — we’re not parsing titles in this feed. let d = parserData("aktuality", "rss", "https://www.aktuality.sk/") - let parsedFeed = try! await FeedParser.parse(d)! - for article in parsedFeed.items { + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) + + for article in parsedFeed.items { XCTAssertNotNil(article.title) } } func testFeedLanguage() async { let d = parserData("manton", "rss", "http://manton.org/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.language, "en-US") } -// func testFeedWithGB2312Encoding() { -// // This feed has an encoding we don’t run into very often. -// // https://github.com/Ranchero-Software/NetNewsWire/issues/1477 -// let d = parserData("kc0011", "rss", "http://kc0011.net/") -// let parsedFeed = try! FeedParser.parse(d)! -// XCTAssert(parsedFeed.items.count > 0) -// for article in parsedFeed.items { -// XCTAssertNotNil(article.contentHTML) -// } -// } + func testFeedWithGB2312Encoding() { + // This feed has an encoding we don’t run into very often. + // https://github.com/Ranchero-Software/NetNewsWire/issues/1477 + let d = parserData("kc0011", "rss", "http://kc0011.net/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssert(parsedFeed.items.count > 0) + for article in parsedFeed.items { + XCTAssertNotNil(article.contentHTML) + } + } } diff --git a/Modules/ParserObjC/.gitignore b/Modules/ParserObjC/.gitignore deleted file mode 100644 index 0023a5340..000000000 --- a/Modules/ParserObjC/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.DS_Store -/.build -/Packages -xcuserdata/ -DerivedData/ -.swiftpm/configuration/registries.json -.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata -.netrc diff --git a/Modules/ParserObjC/Package.swift b/Modules/ParserObjC/Package.swift deleted file mode 100644 index 69281ca1d..000000000 --- a/Modules/ParserObjC/Package.swift +++ /dev/null @@ -1,26 +0,0 @@ -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "ParserObjC", - platforms: [.macOS(.v14), .iOS(.v17)], - products: [ - // Products define the executables and libraries a package produces, making them visible to other packages. - .library( - name: "ParserObjC", - type: .dynamic, - targets: ["ParserObjC"]), - ], - targets: [ - // Targets are the basic building blocks of a package, defining a module or a test suite. - // Targets can depend on other targets in this package and products from dependencies. - .target( - name: "ParserObjC", - cSettings: [ - .headerSearchPath("include") - ] - ), - ] -) diff --git a/Modules/ParserObjC/Sources/ParserObjC/FeedParser.h b/Modules/ParserObjC/Sources/ParserObjC/FeedParser.h deleted file mode 100755 index 0f8df6b07..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/FeedParser.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// FeedParser.h -// RSXML -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedFeed; -@class RSXMLData; - - -@protocol FeedParser - -+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData; - -- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData; - -- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error; - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h b/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h deleted file mode 100644 index be2d892f8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// NSData+RSParser.h -// RSParser -// -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@interface NSData (RSParser) - -- (BOOL)isProbablyHTML; -- (BOOL)isProbablyXML; -- (BOOL)isProbablyJSON; - -- (BOOL)isProbablyJSONFeed; -- (BOOL)isProbablyRSSInJSON; -- (BOOL)isProbablyRSS; -- (BOOL)isProbablyAtom; - -@end - - - diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m b/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m deleted file mode 100644 index 8ac9aa167..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m +++ /dev/null @@ -1,139 +0,0 @@ -// -// NSData+RSParser.m -// RSParser -// -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "NSData+RSParser.h" - - - - -/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/ - -static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes); -static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes); - -@implementation NSData (RSParser) - -- (BOOL)isProbablyHTML { - - return bytesAreProbablyHTML(self.bytes, self.length); -} - -- (BOOL)isProbablyXML { - - return bytesAreProbablyXML(self.bytes, self.length); -} - -- (BOOL)isProbablyJSON { - - return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length); -} - -- (BOOL)isProbablyJSONFeed { - - if (![self isProbablyJSON]) { - return NO; - } - return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length); -} - -- (BOOL)isProbablyRSSInJSON { - - if (![self isProbablyJSON]) { - return NO; - } - const char *bytes = self.bytes; - NSUInteger length = self.length; - return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length); -} - -- (BOOL)isProbablyRSS { - - if (didFindString(" tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find. - return (didFindString("", self.bytes, self.length) && didFindString("", self.bytes, self.length)); -} - -- (BOOL)isProbablyAtom { - - return didFindString(", and & entity-encoded. -@property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities; - -- (NSString *)rsparser_md5Hash; - -- (BOOL)rsparser_contains:(NSString *)s; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m b/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m deleted file mode 100755 index 8a4e7d114..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m +++ /dev/null @@ -1,348 +0,0 @@ -// -// NSString+RSParser.m -// RSParser -// -// Created by Brent Simmons on 9/25/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "NSString+RSParser.h" -#import - - - - -@interface NSScanner (RSParser) - -- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity; - -@end - - -@implementation NSString (RSParser) - -- (BOOL)rsparser_contains:(NSString *)s { - - return [self rangeOfString:s].location != NSNotFound; -} - -- (NSString *)rsparser_stringByDecodingHTMLEntities { - - @autoreleasepool { - - NSScanner *scanner = [[NSScanner alloc] initWithString:self]; - scanner.charactersToBeSkipped = nil; - NSMutableString *result = [[NSMutableString alloc] init]; - - while (true) { - - NSString *scannedString = nil; - if ([scanner scanUpToString:@"&" intoString:&scannedString]) { - [result appendString:scannedString]; - } - if (scanner.isAtEnd) { - break; - } - NSUInteger savedScanLocation = scanner.scanLocation; - - NSString *decodedEntity = nil; - if ([scanner rs_scanEntityValue:&decodedEntity]) { - [result appendString:decodedEntity]; - } - else { - [result appendString:@"&"]; - scanner.scanLocation = savedScanLocation + 1; - } - - if (scanner.isAtEnd) { - break; - } - } - - if ([self isEqualToString:result]) { - return self; - } - return [result copy]; - } -} - - -static NSDictionary *RSEntitiesDictionary(void); -static NSString *RSParserStringWithValue(uint32_t value); - -- (NSString * _Nullable)rs_stringByDecodingEntity { - - // self may or may not have outer & and ; characters. - - NSMutableString *s = [self mutableCopy]; - - if ([s hasPrefix:@"&"]) { - [s deleteCharactersInRange:NSMakeRange(0, 1)]; - } - if ([s hasSuffix:@";"]) { - [s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)]; - } - - NSDictionary *entitiesDictionary = RSEntitiesDictionary(); - - NSString *decodedEntity = entitiesDictionary[self]; - if (decodedEntity) { - return decodedEntity; - } - - if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex - NSScanner *scanner = [[NSScanner alloc] initWithString:s]; - scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"]; - unsigned int hexValue = 0; - if ([scanner scanHexInt:&hexValue]) { - return RSParserStringWithValue((uint32_t)hexValue); - } - return nil; - } - - else if ([s hasPrefix:@"#"]) { - [s deleteCharactersInRange:NSMakeRange(0, 1)]; - NSInteger value = s.integerValue; - if (value < 1) { - return nil; - } - return RSParserStringWithValue((uint32_t)value); - } - - return nil; -} - -- (NSString *)rsparser_stringByEncodingRequiredEntities { - NSMutableString *result = [NSMutableString string]; - - for (NSUInteger i = 0; i < self.length; ++i) { - unichar c = [self characterAtIndex:i]; - - switch (c) { - case '<': - [result appendString:@"<"]; - break; - case '>': - [result appendString:@">"]; - break; - case '&': - [result appendString:@"&"]; - break; - default: - [result appendFormat:@"%C", c]; - break; - } - } - - return [result copy]; -} - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -- (NSData *)_rsparser_md5HashData { - - NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; - unsigned char hash[CC_MD5_DIGEST_LENGTH]; - CC_MD5(data.bytes, (CC_LONG)data.length, hash); - - return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; -} -#pragma GCC diagnostic pop - -- (NSString *)rsparser_md5Hash { - - NSData *md5Data = [self _rsparser_md5HashData]; - const Byte *bytes = md5Data.bytes; - return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; -} - - -@end - -@implementation NSScanner (RSParser) - -- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { - - NSString *s = self.string; - NSUInteger initialScanLocation = self.scanLocation; - static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. - - while (true) { - - unichar ch = [s characterAtIndex:self.scanLocation]; - if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { - break; - } - if (ch == ';') { - if (!decodedEntity) { - return YES; - } - NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; - *decodedEntity = [rawEntity rs_stringByDecodingEntity]; - self.scanLocation = self.scanLocation + 1; - return *decodedEntity != nil; - } - - self.scanLocation = self.scanLocation + 1; - if (self.scanLocation - initialScanLocation > maxEntityLength) { - break; - } - if (self.isAtEnd) { - break; - } - } - - return NO; -} - -@end - -static NSString *RSParserStringWithValue(uint32_t value) { - // From WebCore's HTMLEntityParser - static const uint32_t windowsLatin1ExtensionArray[32] = { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F - }; - - if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160 - value = windowsLatin1ExtensionArray[value - 0x80]; - } - - value = CFSwapInt32HostToLittle(value); - - return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding]; -} - -static NSDictionary *RSEntitiesDictionary(void) { - - static NSDictionary *entitiesDictionary = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - - entitiesDictionary = @{ - // Named entities - @"AElig": @"Æ", - @"Aacute": @"Á", - @"Acirc": @"Â", - @"Agrave": @"À", - @"Aring": @"Å", - @"Atilde": @"Ã", - @"Auml": @"Ä", - @"Ccedil": @"Ç", - @"Dstrok": @"Ð", - @"ETH": @"Ð", - @"Eacute": @"É", - @"Ecirc": @"Ê", - @"Egrave": @"È", - @"Euml": @"Ë", - @"Iacute": @"Í", - @"Icirc": @"Î", - @"Igrave": @"Ì", - @"Iuml": @"Ï", - @"Ntilde": @"Ñ", - @"Oacute": @"Ó", - @"Ocirc": @"Ô", - @"Ograve": @"Ò", - @"Oslash": @"Ø", - @"Otilde": @"Õ", - @"Ouml": @"Ö", - @"Pi": @"Π", - @"THORN": @"Þ", - @"Uacute": @"Ú", - @"Ucirc": @"Û", - @"Ugrave": @"Ù", - @"Uuml": @"Ü", - @"Yacute": @"Y", - @"aacute": @"á", - @"acirc": @"â", - @"acute": @"´", - @"aelig": @"æ", - @"agrave": @"à", - @"amp": @"&", - @"apos": @"'", - @"aring": @"å", - @"atilde": @"ã", - @"auml": @"ä", - @"brkbar": @"¦", - @"brvbar": @"¦", - @"ccedil": @"ç", - @"cedil": @"¸", - @"cent": @"¢", - @"copy": @"©", - @"curren": @"¤", - @"deg": @"°", - @"die": @"¨", - @"divide": @"÷", - @"eacute": @"é", - @"ecirc": @"ê", - @"egrave": @"è", - @"eth": @"ð", - @"euml": @"ë", - @"euro": @"€", - @"frac12": @"½", - @"frac14": @"¼", - @"frac34": @"¾", - @"gt": @">", - @"hearts": @"♥", - @"hellip": @"…", - @"iacute": @"í", - @"icirc": @"î", - @"iexcl": @"¡", - @"igrave": @"ì", - @"iquest": @"¿", - @"iuml": @"ï", - @"laquo": @"«", - @"ldquo": @"“", - @"lsquo": @"‘", - @"lt": @"<", - @"macr": @"¯", - @"mdash": @"—", - @"micro": @"µ", - @"middot": @"·", - @"ndash": @"–", - @"not": @"¬", - @"ntilde": @"ñ", - @"oacute": @"ó", - @"ocirc": @"ô", - @"ograve": @"ò", - @"ordf": @"ª", - @"ordm": @"º", - @"oslash": @"ø", - @"otilde": @"õ", - @"ouml": @"ö", - @"para": @"¶", - @"pi": @"π", - @"plusmn": @"±", - @"pound": @"£", - @"quot": @"\"", - @"raquo": @"»", - @"rdquo": @"”", - @"reg": @"®", - @"rsquo": @"’", - @"sect": @"§", - @"shy": RSParserStringWithValue(173), - @"sup1": @"¹", - @"sup2": @"²", - @"sup3": @"³", - @"szlig": @"ß", - @"thorn": @"þ", - @"times": @"×", - @"trade": @"™", - @"uacute": @"ú", - @"ucirc": @"û", - @"ugrave": @"ù", - @"uml": @"¨", - @"uuml": @"ü", - @"yacute": @"y", - @"yen": @"¥", - @"yuml": @"ÿ", - @"infin": @"∞", - @"nbsp": RSParserStringWithValue(160) - }; - }); - - return entitiesDictionary; -} diff --git a/Modules/ParserObjC/Sources/ParserObjC/ParserData.h b/Modules/ParserObjC/Sources/ParserObjC/ParserData.h deleted file mode 100644 index fe4885144..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/ParserData.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// ParserData.h -// RSParser -// -// Created by Brent Simmons on 10/4/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -__attribute__((swift_attr("@Sendable"))) -@interface ParserData : NSObject - -@property (nonatomic, readonly) NSString *url; -@property (nonatomic, readonly) NSData *data; - -- (instancetype)initWithURL:(NSString *)url data:(NSData *)data; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/ParserData.m b/Modules/ParserObjC/Sources/ParserObjC/ParserData.m deleted file mode 100644 index 68c5f0356..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/ParserData.m +++ /dev/null @@ -1,26 +0,0 @@ -// -// ParserData.m -// RSParser -// -// Created by Brent Simmons on 10/4/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "ParserData.h" - -@implementation ParserData - -- (instancetype)initWithURL:(NSString *)url data:(NSData *)data { - - self = [super init]; - if (!self) { - return nil; - } - - _url = url; - _data = data; - - return self; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h deleted file mode 100755 index 27b5d80e4..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h +++ /dev/null @@ -1,18 +0,0 @@ -// -// RSAtomParser.h -// RSParser -// -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class ParserData; -@class RSParsedFeed; - -@interface RSAtomParser : NSObject - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m deleted file mode 100755 index eaaeeb638..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m +++ /dev/null @@ -1,679 +0,0 @@ -// -// RSAtomParser.m -// RSParser -// -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - - -#import "RSAtomParser.h" -#import "RSSAXParser.h" -#import "RSParsedFeed.h" -#import "RSParsedArticle.h" -#import "NSString+RSParser.h" -#import "RSDateParser.h" -#import "ParserData.h" -#import "RSParsedEnclosure.h" -#import "RSParsedAuthor.h" - -#import - -@interface RSAtomParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) BOOL endFeedFound; -@property (nonatomic) BOOL parsingXHTML; -@property (nonatomic) BOOL parsingSource; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic) NSMutableArray *attributesStack; -@property (nonatomic, readonly) NSDictionary *currentAttributes; -@property (nonatomic) NSMutableString *xhtmlString; -@property (nonatomic) NSString *link; -@property (nonatomic) NSString *title; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) RSParsedAuthor *currentAuthor; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) NSString *language; - -@end - - -@implementation RSAtomParser - -#pragma mark - Class Methods - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData { - - RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData]; - return [parser parseFeed]; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = parserData.data; - _urlString = parserData.url; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _attributesStack = [NSMutableArray new]; - _articles = [NSMutableArray new]; - - return self; -} - - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles]; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kTypeKey = @"type"; -static NSString *kXHTMLType = @"xhtml"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateValue = @"alternate"; -static NSString *kHrefKey = @"href"; -static NSString *kXMLKey = @"xml"; -static NSString *kBaseKey = @"base"; -static NSString *kLangKey = @"lang"; -static NSString *kXMLBaseKey = @"xml:base"; -static NSString *kXMLLangKey = @"xml:lang"; -static NSString *kTextHTMLValue = @"text/html"; -static NSString *kRelatedValue = @"related"; -static NSString *kEnclosureValue = @"enclosure"; -static NSString *kShortURLValue = @"shorturl"; -static NSString *kHTMLValue = @"html"; -static NSString *kEnValue = @"en"; -static NSString *kTextValue = @"text"; -static NSString *kSelfValue = @"self"; -static NSString *kLengthKey = @"length"; -static NSString *kTitleKey = @"title"; - -static const char *kID = "id"; -static const NSInteger kIDLength = 3; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kSummary = "summary"; -static const NSInteger kSummaryLength = 8; - -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -static const char *kPublished = "published"; -static const NSInteger kPublishedLength = 10; - -static const char *kIssued = "issued"; -static const NSInteger kIssuedLength = 7; - -static const char *kUpdated = "updated"; -static const NSInteger kUpdatedLength = 8; - -static const char *kModified = "modified"; -static const NSInteger kModifiedLength = 9; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kName = "name"; -static const NSInteger kNameLength = 5; - -static const char *kEmail = "email"; -static const NSInteger kEmailLength = 6; - -static const char *kURI = "uri"; -static const NSInteger kURILength = 4; - -static const char *kEntry = "entry"; -static const NSInteger kEntryLength = 6; - -static const char *kSource = "source"; -static const NSInteger kSourceLength = 7; - -static const char *kFeed = "feed"; -static const NSInteger kFeedLength = 5; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kRel = "rel"; -static const NSInteger kRelLength = 4; - -static const char *kAlternate = "alternate"; -static const NSInteger kAlternateLength = 10; - -static const char *kHref = "href"; -static const NSInteger kHrefLength = 5; - -static const char *kXML = "xml"; -static const NSInteger kXMLLength = 4; - -static const char *kBase = "base"; -static const NSInteger kBaseLength = 5; - -static const char *kLang = "lang"; -static const NSInteger kLangLength = 5; - -static const char *kTextHTML = "text/html"; -static const NSInteger kTextHTMLLength = 10; - -static const char *kRelated = "related"; -static const NSInteger kRelatedLength = 8; - -static const char *kShortURL = "shorturl"; -static const NSInteger kShortURLLength = 9; - -static const char *kHTML = "html"; -static const NSInteger kHTMLLength = 5; - -static const char *kEn = "en"; -static const NSInteger kEnLength = 3; - -static const char *kText = "text"; -static const NSInteger kTextLength = 5; - -static const char *kSelf = "self"; -static const NSInteger kSelfLength = 5; - -static const char *kEnclosure = "enclosure"; -static const NSInteger kEnclosureLength = 10; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } -} - - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (NSDictionary *)currentAttributes { - - return self.attributesStack.lastObject; -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - -- (void)addFeedLink { - - if (self.link && self.link.length > 0) { - return; - } - - NSString *related = self.currentAttributes[kRelKey]; - if (related == kAlternateValue) { - self.link = self.currentAttributes[kHrefKey]; - } -} - - -- (void)addFeedTitle { - - if (self.title.length < 1) { - self.title = [self currentString]; - } -} - -- (void)addFeedLanguage { - - if (self.language.length < 0) { - self.language = self.currentAttributes[kXMLLangKey] -; - } -} - -- (void)addLink { - - NSDictionary *attributes = self.currentAttributes; - - NSString *urlString = attributes[kHrefKey]; - if (urlString.length < 1) { - return; - } - - RSParsedArticle *article = self.currentArticle; - - NSString *rel = attributes[kRelKey]; - if (rel.length < 1) { - rel = kAlternateValue; - } - - if (rel == kRelatedValue) { - if (!article.link) { - article.link = urlString; - } - } - else if (rel == kAlternateValue) { - if (!article.permalink) { - article.permalink = urlString; - } - } - else if (rel == kEnclosureValue) { - RSParsedEnclosure *enclosure = [self enclosureWithURLString:urlString attributes:attributes]; - [article addEnclosure:enclosure]; - } -} - -- (RSParsedEnclosure *)enclosureWithURLString:(NSString *)urlString attributes:(NSDictionary *)attributes { - - RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init]; - enclosure.url = urlString; - enclosure.title = attributes[kTitleKey]; - enclosure.mimeType = attributes[kTypeKey]; - enclosure.length = [attributes[kLengthKey] integerValue]; - - return enclosure; -} - -- (void)addContent { - - self.currentArticle.body = [self currentString]; -} - - -- (void)addSummary { - - if (!self.currentArticle.body) { - self.currentArticle.body = [self currentString]; - } -} - - -- (NSString *)currentString { - - return self.parser.currentStringWithTrimmedWhitespace; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix) { - return; - } - - if (RSSAXEqualTags(localName, kID, kIDLength)) { - self.currentArticle.guid = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.currentArticle.title = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kContent, kContentLength)) { - [self addContent]; - } - - else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) { - [self addSummary]; - } - - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addLink]; - } - - else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) { - self.currentArticle.datePublished = self.currentDate; - } - - else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) { - self.currentArticle.dateModified = self.currentDate; - } - - // Atom 0.3 dates - else if (RSSAXEqualTags(localName, kIssued, kIssuedLength)) { - if (!self.currentArticle.datePublished) { - self.currentArticle.datePublished = self.currentDate; - } - } - else if (RSSAXEqualTags(localName, kModified, kModifiedLength)) { - if (!self.currentArticle.dateModified) { - self.currentArticle.dateModified = self.currentDate; - } - } -} - - -- (void)addXHTMLTag:(const xmlChar *)localName { - - if (!localName) { - return; - } - - [self.xhtmlString appendString:@"<"]; - [self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]]; - - if (self.currentAttributes.count < 1) { - [self.xhtmlString appendString:@">"]; - return; - } - - for (NSString *oneKey in self.currentAttributes) { - - [self.xhtmlString appendString:@" "]; - - NSString *oneValue = self.currentAttributes[oneKey]; - [self.xhtmlString appendString:oneKey]; - - [self.xhtmlString appendString:@"=\""]; - - oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"""]; - [self.xhtmlString appendString:oneValue]; - - [self.xhtmlString appendString:@"\""]; - } - - [self.xhtmlString appendString:@">"]; -} - - -#pragma mark - RSSAXParserDelegate - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (self.endFeedFound) { - return; - } - - NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - if (!xmlAttributes) { - xmlAttributes = [NSDictionary dictionary]; - } - [self.attributesStack addObject:xmlAttributes]; - - if (self.parsingXHTML) { - [self addXHTMLTag:localName]; - return; - } - - if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = YES; - [self addArticle]; - return; - } - - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = YES; - self.currentAuthor = [[RSParsedAuthor alloc] init]; - return; - } - - if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = YES; - return; - } - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - if (isContentTag) { - self.currentArticle.language = xmlAttributes[kXMLLangKey]; - } - - NSString *contentType = xmlAttributes[kTypeKey]; - if ([contentType isEqualToString:kXHTMLType]) { - self.parsingXHTML = YES; - self.xhtmlString = [NSMutableString stringWithString:@""]; - return; - } - } - - if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addFeedLink]; - return; - } - - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - [self addFeedLanguage]; - } - - [self.parser beginStoringCharacters]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - self.endFeedFound = YES; - return; - } - - if (self.endFeedFound) { - return; - } - - if (self.parsingXHTML) { - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - if (isContentTag) { - self.currentArticle.body = [self.xhtmlString copy]; - } - - else if (isSummaryTag) { - if (self.currentArticle.body.length < 1) { - self.currentArticle.body = [self.xhtmlString copy]; - } - } - } - - if (isContentTag || isSummaryTag) { - self.parsingXHTML = NO; - } - - [self.xhtmlString appendString:@""]; - } - - else if (self.parsingAuthor) { - - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - RSParsedAuthor *author = self.currentAuthor; - if (author.name || author.emailAddress || author.url) { - [self.currentArticle addAuthor:author]; - } - self.currentAuthor = nil; - } - else if (RSSAXEqualTags(localName, kName, kNameLength)) { - self.currentAuthor.name = [self currentString]; - } - else if (RSSAXEqualTags(localName, kEmail, kEmailLength)) { - self.currentAuthor.emailAddress = [self currentString]; - } - else if (RSSAXEqualTags(localName, kURI, kURILength)) { - self.currentAuthor.url = [self currentString]; - } - } - - else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle && !self.parsingSource) { - [self addArticleElement:localName prefix:prefix]; - } - - else if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = NO; - } - - else if (!self.parsingArticle && !self.parsingSource && RSSAXEqualTags(localName, kTitle, kTitleLength)) { - [self addFeedTitle]; - } - - [self.attributesStack removeLastObject]; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) { - - if (RSSAXEqualTags(name, kBase, kBaseLength)) { - return kXMLBaseKey; - } - if (RSSAXEqualTags(name, kLang, kLangLength)) { - return kXMLLangKey; - } - } - - if (prefix) { - return nil; - } - - if (RSSAXEqualTags(name, kRel, kRelLength)) { - return kRelKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - if (RSSAXEqualTags(name, kHref, kHrefLength)) { - return kHrefKey; - } - - if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) { - return kAlternateValue; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kTitle, kTitleLength)) { - return kTitleKey; - } - - return nil; -} - - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger alternateLength = kAlternateLength - 1; - static const NSUInteger textHTMLLength = kTextHTMLLength - 1; - static const NSUInteger relatedLength = kRelatedLength - 1; - static const NSUInteger shortURLLength = kShortURLLength - 1; - static const NSUInteger htmlLength = kHTMLLength - 1; - static const NSUInteger enLength = kEnLength - 1; - static const NSUInteger textLength = kTextLength - 1; - static const NSUInteger selfLength = kSelfLength - 1; - static const NSUInteger enclosureLength = kEnclosureLength - 1; - - if (length == alternateLength && equalBytes(bytes, kAlternate, alternateLength)) { - return kAlternateValue; - } - - if (length == enclosureLength && equalBytes(bytes, kEnclosure, enclosureLength)) { - return kEnclosureValue; - } - - if (length == textHTMLLength && equalBytes(bytes, kTextHTML, textHTMLLength)) { - return kTextHTMLValue; - } - - if (length == relatedLength && equalBytes(bytes, kRelated, relatedLength)) { - return kRelatedValue; - } - - if (length == shortURLLength && equalBytes(bytes, kShortURL, shortURLLength)) { - return kShortURLValue; - } - - if (length == htmlLength && equalBytes(bytes, kHTML, htmlLength)) { - return kHTMLValue; - } - - if (length == enLength && equalBytes(bytes, kEn, enLength)) { - return kEnValue; - } - - if (length == textLength && equalBytes(bytes, kText, textLength)) { - return kTextValue; - } - - if (length == selfLength && equalBytes(bytes, kSelf, selfLength)) { - return kSelfValue; - } - - return nil; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length { - - if (self.parsingXHTML) { - NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)characters length:length encoding:NSUTF8StringEncoding freeWhenDone:NO]; - if (s == nil) { - return; - } - // libxml decodes all entities; we need to re-encode certain characters - // (<, >, and &) when inside XHTML text content. - [self.xhtmlString appendString:s.rsparser_stringByEncodingRequiredEntities]; - } -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h deleted file mode 100755 index 5c3745a32..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSDateParser.h -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -// Common web dates -- RFC 822 and 8601 -- are handled here: the formats you find in JSON and XML feeds. -// These may return nil. They may also return garbage, given bad input. - -NSDate *RSDateWithString(NSString *dateString); - -// If you're using a SAX parser, you have the bytes and don't need to convert to a string first. -// It's faster and uses less memory. -// (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.) - -NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes); - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m deleted file mode 100755 index 3eced1170..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m +++ /dev/null @@ -1,464 +0,0 @@ -// -// RSDateParser.m -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSDateParser.h" -#import - - -typedef struct { - const char *abbreviation; - const NSInteger offsetHours; - const NSInteger offsetMinutes; -} RSTimeZoneAbbreviationAndOffset; - - -#define kNumberOfTimeZones 96 - -static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = { - {"GMT", 0, 0}, //Most common at top, for performance - {"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0}, - {"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0}, - {"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0}, - {"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0}, - {"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0}, - {"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0}, - {"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30}, - {"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0}, - {"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0}, - {"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0}, - {"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0}, - {"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0}, - {"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0}, - {"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0}, - {"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0}, - {"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30}, - {"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0}, - {"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45}, - {"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0}, - {"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0}, - {"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0}, - {"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0}, - {"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0}, - {"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0} -}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/ - - - -#pragma mark - Parser - -enum { - RSJanuary = 1, - RSFebruary, - RSMarch, - RSApril, - RSMay, - RSJune, - RSJuly, - RSAugust, - RSSeptember, - RSOctober, - RSNovember, - RSDecember -}; - -static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) { - - /*Months are 1-based -- January is 1, Dec is 12. - Lots of short-circuits here. Not strict. GIGO.*/ - - NSUInteger i;// = startingIndex; - NSUInteger numberOfAlphaCharactersFound = 0; - char monthCharacters[3] = {0, 0, 0}; - - for (i = startingIndex; i < numberOfBytes; i++) { - - *finalIndex = i; - char character = bytes[i]; - - BOOL isAlphaCharacter = (BOOL)isalpha(character); - if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1) - continue; - if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0) - break; - - numberOfAlphaCharactersFound++; - if (numberOfAlphaCharactersFound == 1) { - if (character == 'F' || character == 'f') - return RSFebruary; - if (character == 'S' || character == 's') - return RSSeptember; - if (character == 'O' || character == 'o') - return RSOctober; - if (character == 'N' || character == 'n') - return RSNovember; - if (character == 'D' || character == 'd') - return RSDecember; - } - - monthCharacters[numberOfAlphaCharactersFound - 1] = character; - if (numberOfAlphaCharactersFound >=3) - break; - } - - if (numberOfAlphaCharactersFound < 2) - return NSNotFound; - - if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul - if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A') - return RSJanuary; - if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') { - if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N') - return RSJune; - return RSJuly; - } - return RSJanuary; - } - - if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May - if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y') - return RSMay; - return RSMarch; - } - - if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August - if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') - return RSAugust; - return RSApril; - } - - return RSJanuary; //should never get here -} - - -static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) { - - /*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years). - *finalIndex will be the index of the last character looked at.*/ - - if (maximumNumberOfDigits > 4) - maximumNumberOfDigits = 4; - - NSUInteger i = 0; - NSUInteger numberOfDigitsFound = 0; - NSInteger digits[4] = {0, 0, 0, 0}; - - for (i = startingIndex; i < numberOfBytes; i++) { - *finalIndex = i; - BOOL isDigit = (BOOL)isdigit(bytes[i]); - if (!isDigit && numberOfDigitsFound < 1) - continue; - if (!isDigit && numberOfDigitsFound > 0) - break; - digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48 - numberOfDigitsFound++; - if (numberOfDigitsFound >= maximumNumberOfDigits) - break; - } - - if (numberOfDigitsFound < 1) - return NSNotFound; - if (numberOfDigitsFound == 1) - return digits[0]; - if (numberOfDigitsFound == 2) - return (digits[0] * 10) + digits[1]; - if (numberOfDigitsFound == 3) - return (digits[0] * 100) + (digits[1] * 10) + digits[2]; - return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3]; -} - - -static BOOL hasAtLeastOneAlphaCharacter(const char *s) { - - NSUInteger length = strlen(s); - NSUInteger i = 0; - - for (i = 0; i < length; i++) { - if (isalpha(s[i])) - return YES; - } - - return NO; -} - - -#pragma mark - Time Zones and offsets - -static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) { - - /*Linear search should be fine. It's a C array, and short (under 100 items). - Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/ - - NSUInteger i; - - for (i = 0; i < kNumberOfTimeZones; i++) { - - RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i]; - if (strcmp(abbreviation, zone.abbreviation) == 0) { - if (zone.offsetHours < 0) - return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60); - return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60); - } - } - - return 0; -} - - -static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) { - - BOOL isPlus = timeZoneCharacters[0] == '+'; - NSUInteger finalIndex = 0; - NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex); - NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex); - - if (hours == NSNotFound) - hours = 0; - if (minutes == NSNotFound) - minutes = 0; - if (hours == 0 && minutes == 0) - return 0; - - NSInteger seconds = (hours * 60 * 60) + (minutes * 60); - if (!isPlus) - seconds = 0 - seconds; - return seconds; -} - - -static const char *rs_GMT = "GMT"; -static const char *rs_UTC = "UTC"; - -static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) { - - /*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST - Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT. - If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/ - - char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character - NSUInteger i = 0; - NSUInteger numberOfCharactersFound = 0; - - for (i = startingIndex; i < numberOfBytes; i++) { - char ch = bytes[i]; - if (ch == ':' || ch == ' ') - continue; - if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') { - numberOfCharactersFound++; - timeZoneCharacters[numberOfCharactersFound - 1] = ch; - } - if (numberOfCharactersFound >= 5) - break; - } - - if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z') - return 0; - if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC)) - return 0; - - if (hasAtLeastOneAlphaCharacter(timeZoneCharacters)) - return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters); - return offsetInSecondsForOffsetCharacters(timeZoneCharacters); -} - - -#pragma mark - Date Creation - -static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) { - - struct tm timeInfo; - timeInfo.tm_sec = (int)second; - timeInfo.tm_min = (int)minute; - timeInfo.tm_hour = (int)hour; - timeInfo.tm_mday = (int)day; - timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in - timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900 - timeInfo.tm_wday = -1; - timeInfo.tm_yday = -1; - timeInfo.tm_isdst = -1; - timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT]; - timeInfo.tm_zone = nil; - - NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone) - if (rawTime == (time_t)ULONG_MAX) { - - /*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/ - - NSDateComponents *dateComponents = [NSDateComponents new]; - - dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset]; - dateComponents.year = year; - dateComponents.month = month; - dateComponents.day = day; - dateComponents.hour = hour; - dateComponents.minute = minute; - dateComponents.second = second + (milliseconds / 1000); - - return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents]; - } - - if (milliseconds > 0) { - rawTime += ((float)milliseconds / 1000.0f); - } - - return [NSDate dateWithTimeIntervalSince1970:rawTime]; -} - - -#pragma mark - Standard Formats - -static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - /*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ" - @"EEE, dd MMM yyyy HH:mm:ss zzz" - @"dd MMM yyyy HH:mm zzz" - @"dd MMM yyyy HH:mm ZZZ" - @"EEE, dd MMM yyyy" - @"EEE, dd MMM yyyy HH:mm zzz" - etc.*/ - - NSUInteger finalIndex = 0; - NSInteger day = 1; - NSInteger month = RSJanuary; - NSInteger year = 1970; - NSInteger hour = 0; - NSInteger minute = 0; - NSInteger second = 0; - NSInteger timeZoneOffset = 0; - - day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex); - if (day < 1 || day == NSNotFound) - day = 1; - - month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex); - year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex); - hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - if (hour == NSNotFound) - hour = 0; - - minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - if (minute == NSNotFound) - minute = 0; - - NSUInteger currentIndex = finalIndex + 1; - - BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':'); - if (hasSeconds) - second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex); - - currentIndex = finalIndex + 1; - BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' '); - if (hasTimeZone) - timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); - - return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset); -} - - -static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" - @"yyyy-MM-dd'T'HH:mm:sszzz" - @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" - etc.*/ - - NSUInteger finalIndex = 0; - NSInteger day = 1; - NSInteger month = RSJanuary; - NSInteger year = 1970; - NSInteger hour = 0; - NSInteger minute = 0; - NSInteger second = 0; - NSInteger milliseconds = 0; - NSInteger timeZoneOffset = 0; - - year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex); - month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - - NSUInteger currentIndex = finalIndex + 1; - BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.'); - if (hasMilliseconds) { - milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex); - currentIndex = finalIndex + 1; - - // Igore more than 3 digits for fraction of a second - while (currentIndex < numberOfBytes && isdigit(bytes[currentIndex])) currentIndex++; - } - - timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); - - return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset); -} - - -static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) { - - NSUInteger i = 0; - - for (i = 0; i < numberOfBytes; i++) { - if (bytes[i] == ' ' || bytes[i] == ',') - return YES; - } - - return NO; -} - - -static BOOL dateIsW3CDate(const char *bytes, NSUInteger numberOfBytes) { - - // Something like 2010-11-17T08:40:07-05:00 - // But might be missing T character in the middle. - // Looks for four digits in a row followed by a -. - - for (NSUInteger i = 0; i < numberOfBytes; i++) { - char ch = bytes[i]; - if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') { - continue; - } - if (numberOfBytes - i < 5) { - return NO; - } - return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == '-'; - } - - return NO; -} - -static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) { - return numberOfBytes < 6 || numberOfBytes > 150; -} - - -#pragma mark - API - -NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - if (numberOfBytesIsOutsideReasonableRange(numberOfBytes)) - return nil; - - if (dateIsW3CDate(bytes, numberOfBytes)) { - return RSParseW3CWithBytes(bytes, numberOfBytes); - } - if (dateIsPubDate(bytes, numberOfBytes)) - return RSParsePubDateWithBytes(bytes, numberOfBytes); - - // Fallback, in case our detection fails. - return RSParseW3CWithBytes(bytes, numberOfBytes); -} - - -NSDate *RSDateWithString(NSString *dateString) { - - const char *utf8String = [dateString UTF8String]; - return RSDateWithBytes(utf8String, strlen(utf8String)); -} - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h deleted file mode 100755 index 67c7f9f6c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h +++ /dev/null @@ -1,35 +0,0 @@ -// -// RSHTMLLinkParser.h -// RSParser -// -// Created by Brent Simmons on 8/7/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -/*Returns all some_text as RSHTMLLink object array.*/ - -@class ParserData; -@class RSHTMLLink; - -@interface RSHTMLLinkParser : NSObject - -+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData; - -@end - - -@interface RSHTMLLink : NSObject - -// Any of these, even urlString, may be nil, because HTML can be bad. - -@property (nonatomic, nullable, readonly) NSString *urlString; //absolute -@property (nonatomic, nullable, readonly) NSString *text; -@property (nonatomic, nullable, readonly) NSString *title; //title attribute inside anchor tag - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m deleted file mode 100755 index 624e33569..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m +++ /dev/null @@ -1,154 +0,0 @@ -// -// RSHTMLLinkParser.m -// RSParser -// -// Created by Brent Simmons on 8/7/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSHTMLLinkParser.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" -#import "ParserData.h" - -#import - - - -@interface RSHTMLLinkParser() - -@property (nonatomic, readonly) NSMutableArray *links; -@property (nonatomic, readonly) ParserData *parserData; -@property (nonatomic, readonly) NSMutableArray *dictionaries; -@property (nonatomic, readonly) NSURL *baseURL; - -@end - - -@interface RSHTMLLink() - -@property (nonatomic, readwrite) NSString *urlString; //absolute -@property (nonatomic, readwrite) NSString *text; -@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag - -@end - - -@implementation RSHTMLLinkParser - - -#pragma mark - Class Methods - -+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData { - - RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData]; - return parser.links; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - NSParameterAssert(parserData.data); - NSParameterAssert(parserData.url); - - self = [super init]; - if (!self) { - return nil; - } - - _links = [NSMutableArray new]; - _parserData = parserData; - _dictionaries = [NSMutableArray new]; - _baseURL = [NSURL URLWithString:parserData.url]; - - [self parse]; - - return self; -} - - -#pragma mark - Parse - -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.parserData.data]; - [parser finishParsing]; -} - - -- (RSHTMLLink *)currentLink { - - return self.links.lastObject; -} - - -static NSString *kHrefKey = @"href"; - -- (NSString *)urlStringFromDictionary:(NSDictionary *)d { - - NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (!href) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL]; - return absoluteURL.absoluteString; -} - - -static NSString *kTitleKey = @"title"; - -- (NSString *)titleFromDictionary:(NSDictionary *)d { - - return [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; -} - - -- (void)handleLinkAttributes:(NSDictionary *)d { - - RSHTMLLink *link = self.currentLink; - link.urlString = [self urlStringFromDictionary:d]; - link.title = [self titleFromDictionary:d]; -} - - -static const char *kAnchor = "a"; -static const NSInteger kAnchorLength = 2; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { - return; - } - - RSHTMLLink *link = [RSHTMLLink new]; - [self.links addObject:link]; - - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleLinkAttributes:d]; - } - - [SAXParser beginStoringCharacters]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName { - - if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { - return; - } - - self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace; -} - -@end - -@implementation RSHTMLLink - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h deleted file mode 100755 index 0010740a1..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h +++ /dev/null @@ -1,98 +0,0 @@ -// -// RSHTMLMetadata.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; -@import CoreGraphics; - -@class RSHTMLMetadataFeedLink; -@class RSHTMLMetadataAppleTouchIcon; -@class RSHTMLMetadataFavicon; -@class RSHTMLOpenGraphProperties; -@class RSHTMLOpenGraphImage; -@class RSHTMLTag; -@class RSHTMLTwitterProperties; - -NS_ASSUME_NONNULL_BEGIN - -__attribute__((swift_attr("@Sendable"))) -@interface RSHTMLMetadata : NSObject - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, readonly) NSString *baseURLString; -@property (nonatomic, readonly) NSArray *tags; - -@property (nonatomic, readonly) NSArray *faviconLinks DEPRECATED_MSG_ATTRIBUTE("Use the favicons property instead."); -@property (nonatomic, readonly) NSArray *favicons; -@property (nonatomic, readonly) NSArray *appleTouchIcons; -@property (nonatomic, readonly) NSArray *feedLinks; - -@property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties; -@property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties; - -@end - - -@interface RSHTMLMetadataAppleTouchIcon : NSObject - -@property (nonatomic, readonly) NSString *rel; -@property (nonatomic, nullable, readonly) NSString *sizes; -@property (nonatomic, readonly) CGSize size; -@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. - -@end - - -@interface RSHTMLMetadataFeedLink : NSObject - -@property (nonatomic, nullable, readonly) NSString *title; -@property (nonatomic, nullable, readonly) NSString *type; -@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. - -@end - -@interface RSHTMLMetadataFavicon : NSObject - -@property (nonatomic, nullable, readonly) NSString *type; -@property (nonatomic, nullable, readonly) NSString *urlString; - -@end - -@interface RSHTMLOpenGraphProperties : NSObject - -// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image. -// See http://ogp.me/ - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, readonly) NSArray *images; - -@end - -@interface RSHTMLOpenGraphImage : NSObject - -@property (nonatomic, nullable, readonly) NSString *url; -@property (nonatomic, nullable, readonly) NSString *secureURL; -@property (nonatomic, nullable, readonly) NSString *mimeType; -@property (nonatomic, readonly) CGFloat width; -@property (nonatomic, readonly) CGFloat height; -@property (nonatomic, nullable, readonly) NSString *altText; - -@end - -@interface RSHTMLTwitterProperties : NSObject - -// TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src. - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, nullable, readonly) NSString *imageURL; // twitter:image:src - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m deleted file mode 100755 index 2def0b078..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m +++ /dev/null @@ -1,483 +0,0 @@ -// -// RSHTMLMetadata.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLMetadata.h" -#import "RSParserInternal.h" -#import "RSHTMLTag.h" - - - -static NSString *urlStringFromDictionary(NSDictionary *d); -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString); -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString); -static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString); -static NSString *relValue(NSDictionary *d); -static BOOL typeIsFeedType(NSString *type); - -static NSString *kIconRelValue = @"icon"; -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kAppleTouchIconValue = @"apple-touch-icon"; -static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed"; -static NSString *kSizesKey = @"sizes"; -static NSString *kTitleKey = @"title"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateKey = @"alternate"; -static NSString *kRSSSuffix = @"/rss+xml"; -static NSString *kAtomSuffix = @"/atom+xml"; -static NSString *kJSONSuffix = @"/json"; -static NSString *kTypeKey = @"type"; - -@interface RSHTMLMetadataAppleTouchIcon () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - - -@interface RSHTMLMetadataFeedLink () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - -@interface RSHTMLMetadataFavicon () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - -@implementation RSHTMLMetadata - -#pragma mark - Init - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - _baseURLString = urlString; - _tags = tags; - - _favicons = [self resolvedFaviconLinks]; - - NSArray *appleTouchIconTags = [self appleTouchIconTags]; - _appleTouchIcons = objectsOfClassWithTags([RSHTMLMetadataAppleTouchIcon class], appleTouchIconTags, urlString); - - NSArray *feedLinkTags = [self feedLinkTags]; - _feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString); - - _openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags]; - _twitterProperties = [[RSHTMLTwitterProperties alloc] initWithURLString:urlString tags:tags]; - - return self; -} - -#pragma mark - Private - -- (NSArray *)linkTagsWithMatchingRel:(NSString *)valueToMatch { - - // Case-insensitive; matches a whitespace-delimited word - - NSMutableArray *tags = [NSMutableArray array]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink || RSParserStringIsEmpty(urlStringFromDictionary(tag.attributes))) { - continue; - } - NSString *oneRelValue = relValue(tag.attributes); - if (oneRelValue) { - NSArray *relValues = [oneRelValue componentsSeparatedByCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; - - for (NSString *relValue in relValues) { - if ([relValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) { - [tags addObject:tag]; - break; - } - } - } - } - - return tags; -} - - -- (NSArray *)appleTouchIconTags { - - NSMutableArray *tags = [NSMutableArray new]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink) { - continue; - } - NSString *oneRelValue = relValue(tag.attributes).lowercaseString; - if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) { - [tags addObject:tag]; - } - } - - return tags; -} - - -- (NSArray *)feedLinkTags { - - NSMutableArray *tags = [NSMutableArray new]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink) { - continue; - } - - NSDictionary *oneDictionary = tag.attributes; - NSString *oneRelValue = relValue(oneDictionary).lowercaseString; - if (![oneRelValue isEqualToString:kAlternateKey]) { - continue; - } - - NSString *oneType = [oneDictionary rsparser_objectForCaseInsensitiveKey:kTypeKey]; - if (!typeIsFeedType(oneType)) { - continue; - } - - if (RSParserStringIsEmpty(urlStringFromDictionary(oneDictionary))) { - continue; - } - - [tags addObject:tag]; - } - - return tags; -} - -- (NSArray *)faviconLinks { - NSMutableArray *urls = [NSMutableArray array]; - - for (RSHTMLMetadataFavicon *favicon in self.favicons) { - [urls addObject:favicon.urlString]; - } - - return urls; -} - -- (NSArray *)resolvedFaviconLinks { - NSArray *tags = [self linkTagsWithMatchingRel:kIconRelValue]; - NSMutableArray *links = [NSMutableArray array]; - NSMutableSet *seenHrefs = [NSMutableSet setWithCapacity:tags.count]; - - for (RSHTMLTag *tag in tags) { - RSHTMLMetadataFavicon *link = [[RSHTMLMetadataFavicon alloc] initWithTag:tag baseURLString:self.baseURLString]; - NSString *urlString = link.urlString; - if (urlString == nil) { - continue; - } - if (![seenHrefs containsObject:urlString]) { - [links addObject:link]; - [seenHrefs addObject:urlString]; - } - } - - return links; -} - -@end - - -static NSString *relValue(NSDictionary *d) { - - return [d rsparser_objectForCaseInsensitiveKey:kRelKey]; -} - - -static NSString *urlStringFromDictionary(NSDictionary *d) { - - NSString *urlString = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (urlString) { - return urlString; - } - - return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; -} - - -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) { - - NSURL *url = [NSURL URLWithString:baseURLString]; - if (!url) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url]; - return absoluteURL.absoluteURL.standardizedURL.absoluteString; -} - - -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) { - - NSString *urlString = urlStringFromDictionary(d); - if (RSParserStringIsEmpty(urlString)) { - return nil; - } - return absoluteURLStringWithRelativeURLString(urlString, baseURLString); -} - - -static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString) { - - NSMutableArray *objects = [NSMutableArray new]; - - for (RSHTMLTag *tag in tags) { - - id oneObject = [[class alloc] initWithTag:tag baseURLString:baseURLString]; - if (oneObject) { - [objects addObject:oneObject]; - } - } - - return objects; -} - - -static BOOL typeIsFeedType(NSString *type) { - - type = type.lowercaseString; - return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix] || [type hasSuffix:kJSONSuffix]; -} - - -@implementation RSHTMLMetadataAppleTouchIcon - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _sizes = [d rsparser_objectForCaseInsensitiveKey:kSizesKey]; - _rel = [d rsparser_objectForCaseInsensitiveKey:kRelKey]; - - _size = CGSizeZero; - if (_sizes) { - NSArray *components = [_sizes componentsSeparatedByString:@"x"]; - if (components.count == 2) { - CGFloat width = [components[0] floatValue]; - CGFloat height = [components[1] floatValue]; - _size = CGSizeMake(width, height); - } - } - - return self; -} - -@end - - -@implementation RSHTMLMetadataFeedLink - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _title = [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; - _type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey]; - - return self; -} - -@end - -@implementation RSHTMLMetadataFavicon - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey]; - - return self; -} - -@end - -@interface RSHTMLOpenGraphImage () - -@property (nonatomic, readwrite) NSString *url; -@property (nonatomic, readwrite) NSString *secureURL; -@property (nonatomic, readwrite) NSString *mimeType; -@property (nonatomic, readwrite) CGFloat width; -@property (nonatomic, readwrite) CGFloat height; -@property (nonatomic, readwrite) NSString *altText; - -@end - -@implementation RSHTMLOpenGraphImage - - -@end - -@interface RSHTMLOpenGraphProperties () - -@property (nonatomic) NSMutableArray *ogImages; -@end - -@implementation RSHTMLOpenGraphProperties - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - _ogImages = [NSMutableArray new]; - - [self parseTags:tags]; - return self; -} - - -- (RSHTMLOpenGraphImage *)currentImage { - - return self.ogImages.lastObject; -} - - -- (RSHTMLOpenGraphImage *)pushImage { - - RSHTMLOpenGraphImage *image = [RSHTMLOpenGraphImage new]; - [self.ogImages addObject:image]; - return image; -} - -- (RSHTMLOpenGraphImage *)ensureImage { - - RSHTMLOpenGraphImage *image = [self currentImage]; - if (image != nil) { - return image; - } - return [self pushImage]; -} - - -- (NSArray *)images { - - return self.ogImages; -} - -static NSString *ogPrefix = @"og:"; -static NSString *ogImage = @"og:image"; -static NSString *ogImageURL = @"og:image:url"; -static NSString *ogImageSecureURL = @"og:image:secure_url"; -static NSString *ogImageType = @"og:image:type"; -static NSString *ogImageWidth = @"og:image:width"; -static NSString *ogImageHeight = @"og:image:height"; -static NSString *ogImageAlt = @"og:image:alt"; -static NSString *ogPropertyKey = @"property"; -static NSString *ogContentKey = @"content"; - -- (void)parseTags:(NSArray *)tags { - - for (RSHTMLTag *tag in tags) { - - if (tag.type != RSHTMLTagTypeMeta) { - continue; - } - - NSString *propertyName = tag.attributes[ogPropertyKey]; - if (!propertyName || ![propertyName hasPrefix:ogPrefix]) { - continue; - } - NSString *content = tag.attributes[ogContentKey]; - if (!content) { - continue; - } - - if ([propertyName isEqualToString:ogImage]) { - RSHTMLOpenGraphImage *image = [self currentImage]; - if (!image || image.url) { // Most likely case, since og:image will probably appear before other image attributes. - image = [self pushImage]; - } - image.url = content; - } - - else if ([propertyName isEqualToString:ogImageURL]) { - [self ensureImage].url = content; - } - else if ([propertyName isEqualToString:ogImageSecureURL]) { - [self ensureImage].secureURL = content; - } - else if ([propertyName isEqualToString:ogImageType]) { - [self ensureImage].mimeType = content; - } - else if ([propertyName isEqualToString:ogImageAlt]) { - [self ensureImage].altText = content; - } - else if ([propertyName isEqualToString:ogImageWidth]) { - [self ensureImage].width = [content floatValue]; - } - else if ([propertyName isEqualToString:ogImageHeight]) { - [self ensureImage].height = [content floatValue]; - } - } -} - -@end - -@implementation RSHTMLTwitterProperties - -static NSString *twitterNameKey = @"name"; -static NSString *twitterContentKey = @"content"; -static NSString *twitterImageSrc = @"twitter:image:src"; - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - for (RSHTMLTag *tag in tags) { - - if (tag.type != RSHTMLTagTypeMeta) { - continue; - } - NSString *name = tag.attributes[twitterNameKey]; - if (!name || ![name isEqualToString:twitterImageSrc]) { - continue; - } - NSString *content = tag.attributes[twitterContentKey]; - if (!content || content.length < 1) { - continue; - } - _imageURL = content; - break; - } - - return self; -} - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h deleted file mode 100755 index f9361905c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSHTMLMetadataParser.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@class RSHTMLMetadata; -@class ParserData; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSHTMLMetadataParser : NSObject - -+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData; - - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m deleted file mode 100755 index 254fd109d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m +++ /dev/null @@ -1,151 +0,0 @@ -// -// RSHTMLMetadataParser.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLMetadataParser.h" -#import "RSHTMLMetadata.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" -#import "ParserData.h" -#import "RSHTMLTag.h" - -#import - - -@interface RSHTMLMetadataParser () - -@property (nonatomic, readonly) ParserData *parserData; -@property (nonatomic, readwrite) RSHTMLMetadata *metadata; -@property (nonatomic) NSMutableArray *tags; -@property (nonatomic) BOOL didFinishParsing; -@property (nonatomic) BOOL shouldScanPastHeadSection; - -@end - - -@implementation RSHTMLMetadataParser - - -#pragma mark - Class Methods - -+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData { - - RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData]; - return parser.metadata; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - NSParameterAssert(parserData.data); - NSParameterAssert(parserData.url); - - self = [super init]; - if (!self) { - return nil; - } - - _parserData = parserData; - _tags = [NSMutableArray new]; - - // YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section. - // This allows for a special case where we continue to scan after the head section. - // (Yes, this match could yield false positives, but it’s harmless.) - _shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound; - - [self parse]; - - return self; -} - - -#pragma mark - Parse - -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.parserData.data]; - [parser finishParsing]; - - self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags]; -} - - -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kRelKey = @"rel"; - -- (NSString *)linkForDictionary:(NSDictionary *)d { - - NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (link) { - return link; - } - - return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; -} - -- (void)handleLinkAttributes:(NSDictionary *)d { - - if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) { - return; - } - if (RSParserStringIsEmpty([self linkForDictionary:d])) { - return; - } - - RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d]; - [self.tags addObject:tag]; -} - -- (void)handleMetaAttributes:(NSDictionary *)d { - - RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d]; - [self.tags addObject:tag]; -} - -#pragma mark - RSSAXHTMLParserDelegate - -static const char *kBody = "body"; -static const NSInteger kBodyLength = 5; -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; -static const char *kMeta = "meta"; -static const NSInteger kMetaLength = 5; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (self.didFinishParsing) { - return; - } - - if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) { - self.didFinishParsing = YES; - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleLinkAttributes:d]; - } - return; - } - - if (RSSAXEqualTags(localName, kMeta, kMetaLength)) { - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleMetaAttributes:d]; - } - } -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h deleted file mode 100644 index e8e9cb426..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h +++ /dev/null @@ -1,33 +0,0 @@ -// -// RSHTMLTag.h -// RSParser -// -// Created by Brent Simmons on 11/26/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -extern NSString *RSHTMLTagNameLink; // @"link" -extern NSString *RSHTMLTagNameMeta; // @"meta" - -typedef NS_ENUM(NSInteger, RSHTMLTagType) { - RSHTMLTagTypeLink, - RSHTMLTagTypeMeta -}; - -@interface RSHTMLTag : NSObject - -- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes; - -+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes; -+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes; - -@property (nonatomic, readonly) RSHTMLTagType type; -@property (nonatomic, readonly) NSDictionary *attributes; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m deleted file mode 100644 index 5b0262ffb..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m +++ /dev/null @@ -1,43 +0,0 @@ -// -// RSHTMLTag.m -// RSParser -// -// Created by Brent Simmons on 11/26/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLTag.h" - -NSString *RSHTMLTagNameLink = @"link"; -NSString *RSHTMLTagNameMeta = @"meta"; - -@implementation RSHTMLTag - -- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes { - - self = [super init]; - if (!self) { - return nil; - } - - _type = type; - _attributes = attributes; - - return self; -} - -+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes { - - return [[self alloc] initWithType:RSHTMLTagTypeLink attributes:attributes]; -} - -+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes { - - return [[self alloc] initWithType:RSHTMLTagTypeMeta attributes:attributes]; -} - -- (NSString *)description { - return [NSString stringWithFormat:@"<%@: %p> type: %ld attributes: %@", NSStringFromClass([self class]), self, (long)self.type, self.attributes]; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h deleted file mode 100755 index 688132fe5..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h +++ /dev/null @@ -1,36 +0,0 @@ -// -// RSOPMLAttributes.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -// OPML allows for arbitrary attributes. -// These are the common attributes in OPML files used as RSS subscription lists. - -extern NSString *OPMLTextKey; //text -extern NSString *OPMLTitleKey; //title -extern NSString *OPMLDescriptionKey; //description -extern NSString *OPMLTypeKey; //type -extern NSString *OPMLVersionKey; //version -extern NSString *OPMLHMTLURLKey; //htmlUrl -extern NSString *OPMLXMLURLKey; //xmlUrl - - -@interface NSDictionary (RSOPMLAttributes) - -// A frequent error in OPML files is to mess up the capitalization, -// so these do a case-insensitive lookup. - -@property (nonatomic, readonly) NSString *opml_text; -@property (nonatomic, readonly) NSString *opml_title; -@property (nonatomic, readonly) NSString *opml_description; -@property (nonatomic, readonly) NSString *opml_type; -@property (nonatomic, readonly) NSString *opml_version; -@property (nonatomic, readonly) NSString *opml_htmlUrl; -@property (nonatomic, readonly) NSString *opml_xmlUrl; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m deleted file mode 100755 index db6508b24..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m +++ /dev/null @@ -1,68 +0,0 @@ -// -// RSOPMLAttributes.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLAttributes.h" -#import "RSParserInternal.h" - - - - -NSString *OPMLTextKey = @"text"; -NSString *OPMLTitleKey = @"title"; -NSString *OPMLDescriptionKey = @"description"; -NSString *OPMLTypeKey = @"type"; -NSString *OPMLVersionKey = @"version"; -NSString *OPMLHMTLURLKey = @"htmlUrl"; -NSString *OPMLXMLURLKey = @"xmlUrl"; - - -@implementation NSDictionary (RSOPMLAttributes) - -- (NSString *)opml_text { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLTextKey]; -} - - -- (NSString *)opml_title { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLTitleKey]; -} - - -- (NSString *)opml_description { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLDescriptionKey]; -} - - -- (NSString *)opml_type { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLTypeKey]; -} - - -- (NSString *)opml_version { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLVersionKey]; -} - - -- (NSString *)opml_htmlUrl { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLHMTLURLKey]; -} - - -- (NSString *)opml_xmlUrl { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLXMLURLKey]; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h deleted file mode 100755 index 5061853fe..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h +++ /dev/null @@ -1,21 +0,0 @@ -// -// RSOPMLDocument.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -#import "RSOPMLItem.h" - - - - -@interface RSOPMLDocument : RSOPMLItem - -@property (nonatomic) NSString *title; -@property (nonatomic) NSString *url; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m deleted file mode 100755 index 1506bd911..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m +++ /dev/null @@ -1,14 +0,0 @@ -// -// RSOPMLDocument.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSOPMLDocument.h" - -@implementation RSOPMLDocument - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h deleted file mode 100755 index 276c62ed7..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSOPMLError.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -extern NSString *RSOPMLErrorDomain; - - -typedef NS_ENUM(NSInteger, RSOPMLErrorCode) { - RSOPMLErrorCodeDataIsWrongFormat = 1024 -}; - - -NSError *RSOPMLWrongFormatError(NSString *fileName); diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m deleted file mode 100755 index 7aa3c5e9d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSOPMLError.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLError.h" - -NSString *RSOPMLErrorDomain = @"com.ranchero.OPML"; - -NSError *RSOPMLWrongFormatError(NSString *fileName) { - - NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file ‘%@’ can’t be parsed because it’s not an OPML file.", @"OPML wrong format"); - NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName]; - - NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format"); - NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString}; - - return [[NSError alloc] initWithDomain:RSOPMLErrorDomain code:RSOPMLErrorCodeDataIsWrongFormat userInfo:userInfo]; -} diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h deleted file mode 100755 index 8c4aea6b0..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSOPMLFeedSpecifier.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSOPMLFeedSpecifier : NSObject - -- (instancetype)initWithTitle:(NSString * _Nullable)title feedDescription:(NSString * _Nullable)feedDescription homePageURL:(NSString * _Nullable)homePageURL feedURL:(NSString *)feedURL; - -@property (nonatomic, nullable, readonly) NSString *title; -@property (nonatomic, nullable, readonly) NSString *feedDescription; -@property (nonatomic, nullable, readonly) NSString *homePageURL; -@property (nonatomic, readonly) NSString *feedURL; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m deleted file mode 100755 index bb32ccf54..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m +++ /dev/null @@ -1,51 +0,0 @@ -// -// RSOPMLFeedSpecifier.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLFeedSpecifier.h" -#import "RSParserInternal.h" - - - -@implementation RSOPMLFeedSpecifier - -- (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL { - - NSParameterAssert(!RSParserStringIsEmpty(feedURL)); - - self = [super init]; - if (!self) { - return nil; - } - - if (RSParserStringIsEmpty(title)) { - _title = nil; - } - else { - _title = title; - } - - if (RSParserStringIsEmpty(feedDescription)) { - _feedDescription = nil; - } - else { - _feedDescription = feedDescription; - } - - if (RSParserStringIsEmpty(homePageURL)) { - _homePageURL = nil; - } - else { - _homePageURL = homePageURL; - } - - _feedURL = feedURL; - - return self; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h deleted file mode 100755 index 15afa48f2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h +++ /dev/null @@ -1,30 +0,0 @@ -// -// RSOPMLItem.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSOPMLFeedSpecifier; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSOPMLItem : NSObject - -@property (nonatomic, nullable) NSDictionary *attributes; -@property (nonatomic, nullable) NSArray *children; - -- (void)addChild:(RSOPMLItem *)child; - -@property (nonatomic, nullable, readonly) RSOPMLFeedSpecifier *feedSpecifier; - -@property (nonatomic, nullable, readonly) NSString *titleFromAttributes; -@property (nonatomic, readonly) BOOL isFolder; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m deleted file mode 100755 index a273cd317..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m +++ /dev/null @@ -1,87 +0,0 @@ -// -// RSOPMLItem.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLItem.h" -#import "RSOPMLAttributes.h" -#import "RSOPMLFeedSpecifier.h" -#import "RSParserInternal.h" - - - -@interface RSOPMLItem () - -@property (nonatomic) NSMutableArray *mutableChildren; - -@end - - -@implementation RSOPMLItem - -@synthesize children = _children; -@synthesize feedSpecifier = _feedSpecifier; - - -- (NSArray *)children { - - return [self.mutableChildren copy]; -} - - -- (void)setChildren:(NSArray *)children { - - _children = children; - self.mutableChildren = [_children mutableCopy]; -} - - -- (void)addChild:(RSOPMLItem *)child { - - if (!self.mutableChildren) { - self.mutableChildren = [NSMutableArray new]; - } - - [self.mutableChildren addObject:child]; -} - - -- (RSOPMLFeedSpecifier *)feedSpecifier { - - if (_feedSpecifier) { - return _feedSpecifier; - } - - NSString *feedURL = self.attributes.opml_xmlUrl; - if (RSParserObjectIsEmpty(feedURL)) { - return nil; - } - - _feedSpecifier = [[RSOPMLFeedSpecifier alloc] initWithTitle:self.titleFromAttributes feedDescription:self.attributes.opml_description homePageURL:self.attributes.opml_htmlUrl feedURL:feedURL]; - - return _feedSpecifier; -} - -- (NSString *)titleFromAttributes { - - NSString *title = self.attributes.opml_title; - if (title) { - return title; - } - title = self.attributes.opml_text; - if (title) { - return title; - } - - return nil; -} - -- (BOOL)isFolder { - - return self.mutableChildren.count > 0; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h deleted file mode 100755 index 8db594b03..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// RSOPMLParser.h -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@class ParserData; -@class RSOPMLDocument; - -typedef void (^OPMLParserCallback)(RSOPMLDocument *opmlDocument, NSError *error); - -// Parses on background thread; calls back on main thread. -void RSParseOPML(ParserData *parserData, OPMLParserCallback callback); - - -@interface RSOPMLParser: NSObject - -+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error; - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m deleted file mode 100755 index 93f2c420c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m +++ /dev/null @@ -1,310 +0,0 @@ -// -// RSOPMLParser.m -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLParser.h" -#import "RSSAXParser.h" -#import "RSOPMLItem.h" -#import "RSOPMLDocument.h" -#import "RSOPMLAttributes.h" -#import "RSOPMLError.h" -#import "RSOPMLParser.h" -#import "ParserData.h" - -#import - - - -@interface RSOPMLParser () - -@property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument; -@property (nonatomic, readwrite) NSError *error; -@property (nonatomic) NSMutableArray *itemStack; - -@end - -void RSParseOPML(ParserData *parserData, OPMLParserCallback callback) { - - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - - @autoreleasepool { - NSError *error = nil; - RSOPMLDocument *opmlDocument = [RSOPMLParser parseOPMLWithParserData:parserData error:&error]; - - dispatch_async(dispatch_get_main_queue(), ^{ - callback(opmlDocument, error); - }); - } - }); -} - -@implementation RSOPMLParser - -#pragma mark - Class Methods - -+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error { - - RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData]; - - RSOPMLDocument *document = parser.OPMLDocument; - document.url = parserData.url; - if (parser.error && error) { - *error = parser.error; - return nil; - } - return document; -} - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - [self parse:parserData]; - - return self; -} - - -#pragma mark - Private - -- (void)parse:(ParserData *)parserData { - - @autoreleasepool { - - if (![self canParseData:parserData.data]) { - - NSString *filename = nil; - NSURL *url = [NSURL URLWithString:parserData.url]; - if (url && url.isFileURL) { - filename = url.path.lastPathComponent; - } - if ([parserData.url hasPrefix:@"http"]) { - filename = parserData.url; - } - if (!filename) { - filename = parserData.url; - } - self.error = RSOPMLWrongFormatError(filename); - return; - } - - RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self]; - - self.itemStack = [NSMutableArray new]; - self.OPMLDocument = [RSOPMLDocument new]; - [self pushItem:self.OPMLDocument]; - - [parser parseData:parserData.data]; - [parser finishParsing]; - } -} - -- (BOOL)canParseData:(NSData *)d { - - // Check for 0, nil); - - /*If itemStack is empty, bad things are happening. - But we still shouldn't crash in production.*/ - - if (self.itemStack.count > 0) { - [self.itemStack removeLastObject]; - } -} - - -- (RSOPMLItem *)currentItem { - - return self.itemStack.lastObject; -} - - -#pragma mark - RSSAXParserDelegate - -static const char *kOutline = "outline"; -static const char kOutlineLength = 8; - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - [SAXParser beginStoringCharacters]; - return; - } - - if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) { - return; - } - - RSOPMLItem *item = [RSOPMLItem new]; - item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - - [[self currentItem] addChild:item]; - [self pushItem:item]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - RSOPMLItem* item = [self currentItem]; - if ([item isKindOfClass:[RSOPMLDocument class]]) { - ((RSOPMLDocument *)item).title = SAXParser.currentStringWithTrimmedWhitespace; - } - return; - } - - if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) { - [self popItem]; - } -} - - -static const char *kText = "text"; -static const NSInteger kTextLength = 5; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kDescription = "description"; -static const NSInteger kDescriptionLength = 12; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kVersion = "version"; -static const NSInteger kVersionLength = 8; - -static const char *kHTMLURL = "htmlUrl"; -static const NSInteger kHTMLURLLength = 8; - -static const char *kXMLURL = "xmlUrl"; -static const NSInteger kXMLURLLength = 7; - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (prefix) { - return nil; - } - - size_t nameLength = strlen((const char *)name); - - if (nameLength == kTextLength - 1) { - if (RSSAXEqualTags(name, kText, kTextLength)) { - return OPMLTextKey; - } - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return OPMLTypeKey; - } - } - - else if (nameLength == kTitleLength - 1) { - if (RSSAXEqualTags(name, kTitle, kTitleLength)) { - return OPMLTitleKey; - } - } - - else if (nameLength == kXMLURLLength - 1) { - if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) { - return OPMLXMLURLKey; - } - } - - else if (nameLength == kVersionLength - 1) { - if (RSSAXEqualTags(name, kVersion, kVersionLength)) { - return OPMLVersionKey; - } - if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) { - return OPMLHMTLURLKey; - } - } - - else if (nameLength == kDescriptionLength - 1) { - if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) { - return OPMLDescriptionKey; - } - } - - return nil; -} - - -static const char *kRSSUppercase = "RSS"; -static const char *kRSSLowercase = "rss"; -static const NSUInteger kRSSLength = 3; -static NSString *RSSUppercaseValue = @"RSS"; -static NSString *RSSLowercaseValue = @"rss"; -static NSString *emptyString = @""; - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - - if (length < 1) { - return emptyString; - } - - if (length == kRSSLength) { - - if (equalBytes(bytes, kRSSUppercase, kRSSLength)) { - return RSSUppercaseValue; - } - else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) { - return RSSLowercaseValue; - } - - } - - return nil; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h deleted file mode 100755 index a2bfb3175..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h +++ /dev/null @@ -1,37 +0,0 @@ -// -// RSParsedArticle.h -// RSParser -// -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedEnclosure; -@class RSParsedAuthor; - -@interface RSParsedArticle : NSObject - -- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL; - -@property (nonatomic, readonly, nonnull) NSString *feedURL; -@property (nonatomic, nonnull) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.) - -@property (nonatomic, nullable) NSString *guid; -@property (nonatomic, nullable) NSString *title; -@property (nonatomic, nullable) NSString *body; -@property (nonatomic, nullable) NSString *link; -@property (nonatomic, nullable) NSString *permalink; -@property (nonatomic, nullable) NSSet *authors; -@property (nonatomic, nullable) NSSet *enclosures; -@property (nonatomic, nullable) NSDate *datePublished; -@property (nonatomic, nullable) NSDate *dateModified; -@property (nonatomic, nonnull) NSDate *dateParsed; -@property (nonatomic, nullable) NSString *language; - -- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure; -- (void)addAuthor:(RSParsedAuthor *_Nonnull)author; - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m deleted file mode 100755 index b94930a79..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m +++ /dev/null @@ -1,134 +0,0 @@ -// -// RSParsedArticle.m -// RSParser -// -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. -// - - -#import "RSParsedArticle.h" -#import "RSParserInternal.h" -#import "NSString+RSParser.h" -#import "RSParsedAuthor.h" -#import "RSParsedEnclosure.h" - - - -@implementation RSParsedArticle - - -#pragma mark - Init - -- (instancetype)initWithFeedURL:(NSString *)feedURL { - - NSParameterAssert(feedURL != nil); - - self = [super init]; - if (!self) { - return nil; - } - - _feedURL = feedURL; - _dateParsed = [NSDate date]; - - return self; -} - - -#pragma mark - Enclosures - -- (void)addEnclosure:(RSParsedEnclosure *)enclosure { - - if (self.enclosures) { - self.enclosures = [self.enclosures setByAddingObject:enclosure]; - } - else { - self.enclosures = [NSSet setWithObject:enclosure]; - } -} - -#pragma mark - Authors - -- (void)addAuthor:(RSParsedAuthor *)author { - - if (self.authors) { - self.authors = [self.authors setByAddingObject:author]; - } - else { - self.authors = [NSSet setWithObject:author]; - } -} - -#pragma mark - articleID - -- (NSString *)articleID { - - if (self.guid) { - return self.guid; - } - - if (!_articleID) { - _articleID = [self calculatedArticleID]; - } - - return _articleID; -} - - -- (NSString *)calculatedArticleID { - - /*Concatenate a combination of properties when no guid. Then hash the result. - In general, feeds should have guids. When they don't, re-runs are very likely, - because there's no other 100% reliable way to determine identity. - This is intended to create an ID unique inside a feed, but not globally unique. - Not suitable for a database ID, in other words.*/ - - NSMutableString *s = [NSMutableString stringWithString:@""]; - - NSString *datePublishedTimeStampString = nil; - if (self.datePublished) { - datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]; - } - - // Ideally we have a permalink and a pubDate. Either one would probably be a good guid, but together they should be rock-solid. (In theory. Feeds are buggy, though.) - if (!RSParserStringIsEmpty(self.permalink) && datePublishedTimeStampString) { - [s appendString:self.permalink]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.link) && datePublishedTimeStampString) { - [s appendString:self.link]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.title) && datePublishedTimeStampString) { - [s appendString:self.title]; - [s appendString:datePublishedTimeStampString]; - } - - else if (datePublishedTimeStampString) { - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.permalink)) { - [s appendString:self.permalink]; - } - - else if (!RSParserStringIsEmpty(self.link)) { - [s appendString:self.link]; - } - - else if (!RSParserStringIsEmpty(self.title)) { - [s appendString:self.title]; - } - - else if (!RSParserStringIsEmpty(self.body)) { - [s appendString:self.body]; - } - - return [s rsparser_md5Hash]; -} - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h deleted file mode 100644 index 2c28236a2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSParsedAuthor.h -// RSParserTests -// -// Created by Brent Simmons on 12/19/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@interface RSParsedAuthor : NSObject - -@property (nonatomic, nullable) NSString *name; -@property (nonatomic, nullable) NSString *emailAddress; -@property (nonatomic, nullable) NSString *url; - -+ (instancetype _Nonnull )authorWithSingleString:(NSString *_Nonnull)s; // Don’t know which property it is. Guess based on contents of the string. Common with RSS. - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m deleted file mode 100644 index 154b546c8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m +++ /dev/null @@ -1,34 +0,0 @@ -// -// RSParsedAuthor.m -// RSParserTests -// -// Created by Brent Simmons on 12/19/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "NSString+RSParser.h" - -#import "RSParsedAuthor.h" - -@implementation RSParsedAuthor - -+ (instancetype)authorWithSingleString:(NSString *)s { - - // The author element in RSS is supposed to be email address — but often it’s a name, and sometimes a URL. - - RSParsedAuthor *author = [[self alloc] init]; - - if ([s rsparser_contains:@"@"]) { - author.emailAddress = s; - } - else if ([s.lowercaseString hasPrefix:@"http"]) { - author.url = s; - } - else { - author.name = s; - } - - return author; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h deleted file mode 100644 index 8fc9e404d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSParsedEnclosure.h -// RSParser -// -// Created by Brent Simmons on 12/18/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSParsedEnclosure : NSObject - -@property (nonatomic) NSString *url; -@property (nonatomic) NSInteger length; -@property (nonatomic, nullable) NSString *mimeType; -@property (nonatomic, nullable) NSString *title; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m deleted file mode 100644 index f6f35da59..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m +++ /dev/null @@ -1,13 +0,0 @@ -// -// RSParsedEnclosure.m -// RSParser -// -// Created by Brent Simmons on 12/18/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "RSParsedEnclosure.h" - -@implementation RSParsedEnclosure - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h deleted file mode 100755 index 80be90fed..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h +++ /dev/null @@ -1,23 +0,0 @@ -// -// RSParsedFeed.h -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedArticle; - -@interface RSParsedFeed : NSObject - -- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link language:(NSString * _Nullable)language articles:(NSArray * _Nonnull)articles; - -@property (nonatomic, readonly, nonnull) NSString *urlString; -@property (nonatomic, readonly, nullable) NSString *title; -@property (nonatomic, readonly, nullable) NSString *link; -@property (nonatomic, readonly, nullable) NSString *language; -@property (nonatomic, readonly, nonnull) NSSet *articles; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m deleted file mode 100755 index ef0c42e76..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m +++ /dev/null @@ -1,32 +0,0 @@ -// -// RSParsedFeed.m -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSParsedFeed.h" - - - -@implementation RSParsedFeed - -- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link language:(NSString *)language articles:(NSSet *)articles { - - self = [super init]; - if (!self) { - return nil; - } - - _urlString = urlString; - _title = title; - _link = link; - _language = language; - _articles = articles; - - return self; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h b/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h deleted file mode 100755 index 76209e076..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSParserInternal.h -// RSParser -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -BOOL RSParserObjectIsEmpty(id _Nullable obj); -BOOL RSParserStringIsEmpty(NSString * _Nullable s); - - -@interface NSDictionary (RSParserInternal) - -- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m b/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m deleted file mode 100755 index 4ba6f8a97..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m +++ /dev/null @@ -1,61 +0,0 @@ -// -// RSParserInternal.m -// RSParser -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSParserInternal.h" -#import - - -static BOOL RSParserIsNil(id obj) { - - return obj == nil || obj == [NSNull null]; -} - -BOOL RSParserObjectIsEmpty(id obj) { - - if (RSParserIsNil(obj)) { - return YES; - } - - if ([obj respondsToSelector:@selector(count)]) { - return [obj count] < 1; - } - - if ([obj respondsToSelector:@selector(length)]) { - return [obj length] < 1; - } - - return NO; /*Shouldn't get here very often.*/ -} - -BOOL RSParserStringIsEmpty(NSString *s) { - - return RSParserIsNil(s) || s.length < 1; -} - - -@implementation NSDictionary (RSParserInternal) - -- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key { - - id obj = self[key]; - if (obj) { - return obj; - } - - for (NSString *oneKey in self.allKeys) { - - if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { - return self[oneKey]; - } - } - - return nil; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h deleted file mode 100755 index 26e97d0a2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSRSSParser.h -// RSParser -// -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class ParserData; -@class RSParsedFeed; - -@interface RSRSSParser : NSObject - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m deleted file mode 100755 index 455320ab2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m +++ /dev/null @@ -1,523 +0,0 @@ -// -// RSRSSParser.m -// RSParser -// -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -#import "RSRSSParser.h" -#import "RSSAXParser.h" -#import "RSParsedFeed.h" -#import "RSParsedArticle.h" -#import "RSParserInternal.h" -#import "NSString+RSParser.h" -#import "RSDateParser.h" -#import "ParserData.h" -#import "RSParsedEnclosure.h" -#import "RSParsedAuthor.h" - - - -#import - - -@interface RSRSSParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) NSDictionary *currentAttributes; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) BOOL parsingChannelImage; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) BOOL endRSSFound; -@property (nonatomic) NSString *link; -@property (nonatomic) NSString *title; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) BOOL isRDF; -@property (nonatomic) NSString *language; - -@end - - -@implementation RSRSSParser - -#pragma mark - Class Methods - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData { - - RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData]; - return [parser parseFeed]; -} - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = parserData.data; - _urlString = parserData.url; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _articles = [NSMutableArray new]; - - return self; -} - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles]; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kIsPermaLinkKey = @"isPermaLink"; -static NSString *kURLKey = @"url"; -static NSString *kLengthKey = @"length"; -static NSString *kTypeKey = @"type"; -static NSString *kFalseValue = @"false"; -static NSString *kTrueValue = @"true"; -static NSString *kContentEncodedKey = @"content:encoded"; -static NSString *kDCDateKey = @"dc:date"; -static NSString *kDCCreatorKey = @"dc:creator"; -static NSString *kRDFAboutKey = @"rdf:about"; - -static const char *kItem = "item"; -static const NSInteger kItemLength = 5; - -static const char *kImage = "image"; -static const NSInteger kImageLength = 6; - -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kDC = "dc"; -static const NSInteger kDCLength = 3; - -static const char *kCreator = "creator"; -static const NSInteger kCreatorLength = 8; - -static const char *kDate = "date"; -static const NSInteger kDateLength = 5; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kEncoded = "encoded"; -static const NSInteger kEncodedLength = 8; - -static const char *kGuid = "guid"; -static const NSInteger kGuidLength = 5; - -static const char *kPubDate = "pubDate"; -static const NSInteger kPubDateLength = 8; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kDescription = "description"; -static const NSInteger kDescriptionLength = 12; - -static const char *kRSS = "rss"; -static const NSInteger kRSSLength = 4; - -static const char *kURL = "url"; -static const NSInteger kURLLength = 4; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kIsPermaLink = "isPermaLink"; -static const NSInteger kIsPermaLinkLength = 12; - -static const char *kRDF = "rdf"; -static const NSInteger kRDFlength = 4; - -static const char *kAbout = "about"; -static const NSInteger kAboutLength = 6; - -static const char *kFalse = "false"; -static const NSInteger kFalseLength = 6; - -static const char *kTrue = "true"; -static const NSInteger kTrueLength = 5; - -static const char *kUppercaseRDF = "RDF"; -static const NSInteger kUppercaseRDFLength = 4; - -static const char *kEnclosure = "enclosure"; -static const NSInteger kEnclosureLength = 10; - -static const char *kLanguage = "language"; -static const NSInteger kLanguageLength = 9; - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } -} - - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - if (!self.link) { - self.link = [self currentString]; - } - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.title = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kLanguage, kLanguageLength)) { - self.language = [self currentString]; - } -} - -- (void)addAuthorWithString:(NSString *)authorString { - - if (RSParserStringIsEmpty(authorString)) { - return; - } - - RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:[self currentString]]; - [self.currentArticle addAuthor:author]; -} - -- (void)addDCElement:(const xmlChar *)localName { - - if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) { - [self addAuthorWithString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kDate, kDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } -} - - -- (void)addGuid { - - NSString *guid = [self currentString]; - self.currentArticle.guid = guid; - - NSString *isPermaLinkValue = [self.currentAttributes rsparser_objectForCaseInsensitiveKey:@"ispermalink"]; - if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) { - if ([self stringIsProbablyAURLOrRelativePath:guid]) { - self.currentArticle.permalink = [self urlString:guid]; - } - } -} - -- (void)addEnclosure { - - NSDictionary *attributes = self.currentAttributes; - NSString *url = attributes[kURLKey]; - if (!url || url.length < 1) { - return; - } - - RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init]; - enclosure.url = url; - enclosure.length = [attributes[kLengthKey] integerValue]; - enclosure.mimeType = attributes[kTypeKey]; - - [self.currentArticle addEnclosure:enclosure]; -} - -- (BOOL)stringIsProbablyAURLOrRelativePath:(NSString *)s { - - /*The RSS guid is defined as a permalink, except when it appears like this: - some—identifier - However, people often seem to think it’s *not* a permalink by default, even - though it is. So we try to detect the situation where the value is not a URL string, - and not even a relative path. This may need to evolve over time as we find - feeds broken in different ways.*/ - - if (![s rsparser_contains:@"/"]) { - // This seems to be just about the best possible check. - // Bad guids are often just integers, for instance. - return NO; - } - - if ([s.lowercaseString hasPrefix:@"tag:"]) { // A common non-URL guid form - return NO; - } - return YES; -} - -- (NSString *)urlString:(NSString *)s { - - /*Resolve against home page URL (if available) or feed URL.*/ - - if ([[s lowercaseString] hasPrefix:@"http"]) { - return s; - } - - if (!self.link) { - //TODO: get feed URL and use that to resolve URL.*/ - return s; - } - - NSURL *baseURL = [NSURL URLWithString:self.link]; - if (!baseURL) { - return s; - } - - NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL]; - if (resolvedURL.absoluteString) { - return resolvedURL.absoluteString; - } - - return s; -} - - -- (NSString *)currentString { - - return self.parser.currentStringWithTrimmedWhitespace; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kDC, kDCLength)) { - - [self addDCElement:localName]; - return; - } - - if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) { - NSString *s = [self currentString]; - if (!RSParserStringIsEmpty(s)) { - self.currentArticle.body = s; - } - return; - } - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kGuid, kGuidLength)) { - [self addGuid]; - } - else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } - else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - [self addAuthorWithString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - self.currentArticle.link = [self urlString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { - - if (!self.currentArticle.body) { - self.currentArticle.body = [self currentString]; - } - } - else if (!self.parsingAuthor && RSSAXEqualTags(localName, kTitle, kTitleLength)) { - NSString *articleTitle = [self currentString]; - if (articleTitle != nil) { - self.currentArticle.title = articleTitle; - } - } - else if (RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) { - [self addEnclosure]; - } -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - -#pragma mark - RSSAXParserDelegate - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (self.endRSSFound) { - return; - } - - if (RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) { - self.isRDF = YES; - return; - } - - NSDictionary *xmlAttributes = nil; - if ((self.isRDF && RSSAXEqualTags(localName, kItem, kItemLength)) || RSSAXEqualTags(localName, kGuid, kGuidLength) || RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) { - xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - } - if (self.currentAttributes != xmlAttributes) { - self.currentAttributes = xmlAttributes; - } - - if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) { - - [self addArticle]; - self.parsingArticle = YES; - - if (self.isRDF && xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/ - self.currentArticle.guid = xmlAttributes[kRDFAboutKey]; - self.currentArticle.permalink = self.currentArticle.guid; - } - } - - else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = YES; - } - else if (!prefix && RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - if (self.parsingArticle) { - self.parsingAuthor = true; - } - } - - if (!self.parsingChannelImage) { - [self.parser beginStoringCharacters]; - } -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (self.endRSSFound) { - return; - } - - if (self.isRDF && RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) { - self.endRSSFound = YES; - } - - else if (RSSAXEqualTags(localName, kRSS, kRSSLength)) { - self.endRSSFound = YES; - } - - else if (RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = NO; - } - - else if (RSSAXEqualTags(localName, kItem, kItemLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle) { - [self addArticleElement:localName prefix:prefix]; - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - } - } - - else if (!self.parsingChannelImage) { - [self addFeedElement:localName prefix:prefix]; - } -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) { - - if (RSSAXEqualTags(name, kAbout, kAboutLength)) { - return kRDFAboutKey; - } - - return nil; - } - - if (prefix) { - return nil; - } - - if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) { - return kIsPermaLinkKey; - } - - if (RSSAXEqualTags(name, kURL, kURLLength)) { - return kURLKey; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - return nil; -} - - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger falseLength = kFalseLength - 1; - static const NSUInteger trueLength = kTrueLength - 1; - - if (length == falseLength && equalBytes(bytes, kFalse, falseLength)) { - return kFalseValue; - } - - if (length == trueLength && equalBytes(bytes, kTrue, trueLength)) { - return kTrueValue; - } - - return nil; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h deleted file mode 100755 index f67d60cf6..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// RSSAXHTMLParser.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@class RSSAXHTMLParser; - -@protocol RSSAXHTMLParserDelegate - -@optional - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char *_Nullable*_Nullable)attributes; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(nullable const unsigned char *)localName; - -// Length is guaranteed to be greater than 0. -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(nullable const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might). - -@end - - -@interface RSSAXHTMLParser : NSObject - - -- (instancetype)initWithDelegate:(id)delegate; - -- (void)parseData:(NSData *)data; -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; -- (void)cancel; - -@property (nullable, nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded. -@property (nullable, nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters. -@property (nullable, nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement. - -// Delegate can call from within XMLStartElement. - -- (nullable NSDictionary *)attributesDictionary:(const unsigned char *_Nullable*_Nullable)attributes; - - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m deleted file mode 100755 index 5df2d84fd..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m +++ /dev/null @@ -1,321 +0,0 @@ -// -// RSSAXHTMLParser.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" - -#import -#import -#import - - - -@interface RSSAXHTMLParser () - -@property (nonatomic) id delegate; -@property (nonatomic, assign) htmlParserCtxtPtr context; -@property (nonatomic, assign) BOOL storingCharacters; -@property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - -@end - - -@implementation RSSAXHTMLParser - - -+ (void)initialize { - - RSSAXInitLibXMLParser(); -} - - -#pragma mark - Init - -- (instancetype)initWithDelegate:(id)delegate { - - self = [super init]; - if (self == nil) - return nil; - - _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; - } - - return self; -} - - -#pragma mark - Dealloc - -- (void)dealloc { - - if (_context != nil) { - htmlFreeParserCtxt(_context); - _context = nil; - } - _delegate = nil; -} - - -#pragma mark - API - -static xmlSAXHandler saxHandlerStruct; - -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { - - if (self.context == nil) { - - xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); - self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); - htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); - } - - @autoreleasepool { - htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); - } -} - - -- (void)finishParsing { - - NSAssert(self.context != nil, nil); - if (self.context == nil) - return; - - @autoreleasepool { - htmlParseChunk(self.context, nil, 0, 1); - htmlFreeParserCtxt(self.context); - self.context = nil; - self.characters = nil; - } -} - - -- (void)cancel { - - @autoreleasepool { - xmlStopParser(self.context); - } -} - - - -- (void)beginStoringCharacters { - self.storingCharacters = YES; - self.characters = [NSMutableData new]; -} - - -- (void)endStoringCharacters { - self.storingCharacters = NO; - self.characters = nil; -} - - -- (NSData *)currentCharacters { - - if (!self.storingCharacters) { - return nil; - } - - return self.characters; -} - - -- (NSString *)currentString { - - NSData *d = self.currentCharacters; - if (RSParserObjectIsEmpty(d)) { - return nil; - } - - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; -} - - -- (NSString *)currentStringWithTrimmedWhitespace { - - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; -} - - -#pragma mark - Attributes Dictionary - -- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes { - - if (!attributes) { - return nil; - } - - NSMutableDictionary *d = [NSMutableDictionary new]; - - NSInteger ix = 0; - NSString *currentKey = nil; - while (true) { - - const xmlChar *oneAttribute = attributes[ix]; - ix++; - - if (!currentKey && !oneAttribute) { - break; - } - - if (!currentKey) { - currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - else { - NSString *value = nil; - if (oneAttribute) { - value = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - - d[currentKey] = value ? value : @""; - currentKey = nil; - } - } - - return [d copy]; -} - - -#pragma mark - Callbacks - -- (void)xmlEndDocument { - - @autoreleasepool { - if (self.delegateRespondsToEndOfDocumentMethod) { - [self.delegate saxParserDidReachEndOfDocument:self]; - } - - [self endStoringCharacters]; - } -} - - -- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { - - if (length < 1) { - return; - } - - @autoreleasepool { - if (self.storingCharacters) { - [self.characters appendBytes:(const void *)ch length:length]; - } - - if (self.delegateRespondsToCharactersFoundMethod) { - [self.delegate saxParser:self XMLCharactersFound:ch length:length]; - } - } -} - - -- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - - [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; - } - } -} - - -- (void)xmlEndElement:(const xmlChar *)localName { - - @autoreleasepool { - if (self.delegateRespondsToEndElementMethod) { - [self.delegate saxParser:self XMLEndElement:localName]; - } - - [self endStoringCharacters]; - } -} - - -@end - - -static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) { - - [(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes]; -} - - -static void endElementSAX(void *context, const xmlChar *localname) { - [(__bridge RSSAXHTMLParser *)context xmlEndElement:localname]; -} - - -static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { - [(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; -} - - -static void endDocumentSAX(void *context) { - [(__bridge RSSAXHTMLParser *)context xmlEndDocument]; -} - - -static htmlSAXHandler saxHandlerStruct = { - nil, /* internalSubset */ - nil, /* isStandalone */ - nil, /* hasInternalSubset */ - nil, /* hasExternalSubset */ - nil, /* resolveEntity */ - nil, /* getEntity */ - nil, /* entityDecl */ - nil, /* notationDecl */ - nil, /* attributeDecl */ - nil, /* elementDecl */ - nil, /* unparsedEntityDecl */ - nil, /* setDocumentLocator */ - nil, /* startDocument */ - endDocumentSAX, /* endDocument */ - startElementSAX, /* startElement*/ - endElementSAX, /* endElement */ - nil, /* reference */ - charactersFoundSAX, /* characters */ - nil, /* ignorableWhitespace */ - nil, /* processingInstruction */ - nil, /* comment */ - nil, /* warning */ - nil, /* error */ - nil, /* fatalError //: unused error() get all the errors */ - nil, /* getParameterEntity */ - nil, /* cdataBlock */ - nil, /* externalSubset */ - XML_SAX2_MAGIC, - nil, - nil, /* startElementNs */ - nil, /* endElementNs */ - nil /* serror */ -}; - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h deleted file mode 100755 index 80ca30a75..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h +++ /dev/null @@ -1,69 +0,0 @@ -// -// RSSAXParser.h -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -/*Thread-safe, not re-entrant. - - Calls to the delegate will happen on the same thread where the parser runs. - - This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use. - - This class is not meant to be sub-classed. Use the delegate methods. - */ - - -@class RSSAXParser; - -@protocol RSSAXParserDelegate - -@optional - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes; - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri; - -// Length is guaranteed to be greater than 0. -- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/ - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/ - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length; - -@end - - -void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser. - -/*For use by delegate.*/ - -BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength); - - -@interface RSSAXParser : NSObject - -- (instancetype)initWithDelegate:(id)delegate; - -- (void)parseData:(NSData *)data; -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; -- (void)cancel; - -@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/ -@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/ -@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/ - -/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/ - -- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m deleted file mode 100755 index 02d6988b8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m +++ /dev/null @@ -1,353 +0,0 @@ -// -// RSSAXParser.m -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSSAXParser.h" -#import "RSParserInternal.h" - -#import -#import -#import - - - -@interface RSSAXParser () - -@property (nonatomic, weak) id delegate; -@property (nonatomic, assign) xmlParserCtxtPtr context; -@property (nonatomic, assign) BOOL storingCharacters; -@property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToInternedStringMethod; -@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - -@end - - -@implementation RSSAXParser - -+ (void)initialize { - - RSSAXInitLibXMLParser(); -} - - -#pragma mark - Init - -- (instancetype)initWithDelegate:(id)delegate { - - self = [super init]; - if (self == nil) - return nil; - - _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) { - _delegateRespondsToInternedStringMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) { - _delegateRespondsToInternedStringForValueMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; - } - - return self; -} - - -#pragma mark - Dealloc - -- (void)dealloc { - if (_context != nil) { - xmlFreeParserCtxt(_context); - _context = nil; - } - _delegate = nil; -} - - -#pragma mark - API - -static xmlSAXHandler saxHandlerStruct; - -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { - - if (self.context == nil) { - - self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil); - xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT); - } - - @autoreleasepool { - xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); - } -} - - -- (void)finishParsing { - - NSAssert(self.context != nil, nil); - if (self.context == nil) - return; - - @autoreleasepool { - xmlParseChunk(self.context, nil, 0, 1); - xmlFreeParserCtxt(self.context); - self.context = nil; - self.characters = nil; - } -} - - -- (void)cancel { - - @autoreleasepool { - xmlStopParser(self.context); - } -} - - -- (void)beginStoringCharacters { - self.storingCharacters = YES; - self.characters = [NSMutableData new]; -} - - -- (void)endStoringCharacters { - self.storingCharacters = NO; - self.characters = nil; -} - - -- (NSData *)currentCharacters { - - if (!self.storingCharacters) { - return nil; - } - - return self.characters; -} - - -- (NSString *)currentString { - - NSData *d = self.currentCharacters; - if (RSParserObjectIsEmpty(d)) { - return nil; - } - - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; -} - - -- (NSString *)currentStringWithTrimmedWhitespace { - - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; -} - - -#pragma mark - Attributes Dictionary - -- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes { - - if (numberOfAttributes < 1 || !attributes) { - return nil; - } - - NSMutableDictionary *d = [NSMutableDictionary new]; - - @autoreleasepool { - NSInteger i = 0, j = 0; - for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) { - - NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]); - NSString *value = nil; - - if (self.delegateRespondsToInternedStringForValueMethod) { - value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue]; - } - if (!value) { - value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding]; - } - - NSString *attributeName = nil; - - if (self.delegateRespondsToInternedStringMethod) { - attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]]; - } - - if (!attributeName) { - attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]]; - if (attributes[j + 1]) { - NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]]; - attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName]; - } - } - - if (value && attributeName) { - d[attributeName] = value; - } - } - } - - return d; -} - - -#pragma mark - Equal Tags - -BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) { - - if (!localName) { - return NO; - } - return !strncmp((const char *)localName, tag, (size_t)tagLength); -} - - -#pragma mark - Callbacks - -- (void)xmlEndDocument { - - @autoreleasepool { - if (self.delegateRespondsToEndOfDocumentMethod) { - [self.delegate saxParserDidReachEndOfDocument:self]; - } - - [self endStoringCharacters]; - } -} - - -- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { - - if (length < 1) { - return; - } - - @autoreleasepool { - if (self.storingCharacters) { - [self.characters appendBytes:(const void *)ch length:length]; - } - - if (self.delegateRespondsToCharactersFoundMethod) { - [self.delegate saxParser:self XMLCharactersFound:ch length:length]; - } - } -} - - -- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - - [self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes]; - } - } -} - - -- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - @autoreleasepool { - if (self.delegateRespondsToEndElementMethod) { - [self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri]; - } - - [self endStoringCharacters]; - } -} - - -@end - - -static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { - - [(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes]; -} - - -static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { - [(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI]; -} - - -static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { - [(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; -} - - -static void endDocumentSAX(void *context) { - [(__bridge RSSAXParser *)context xmlEndDocument]; -} - - -static xmlSAXHandler saxHandlerStruct = { - nil, /* internalSubset */ - nil, /* isStandalone */ - nil, /* hasInternalSubset */ - nil, /* hasExternalSubset */ - nil, /* resolveEntity */ - nil, /* getEntity */ - nil, /* entityDecl */ - nil, /* notationDecl */ - nil, /* attributeDecl */ - nil, /* elementDecl */ - nil, /* unparsedEntityDecl */ - nil, /* setDocumentLocator */ - nil, /* startDocument */ - endDocumentSAX, /* endDocument */ - nil, /* startElement*/ - nil, /* endElement */ - nil, /* reference */ - charactersFoundSAX, /* characters */ - nil, /* ignorableWhitespace */ - nil, /* processingInstruction */ - nil, /* comment */ - nil, /* warning */ - nil, /* error */ - nil, /* fatalError //: unused error() get all the errors */ - nil, /* getParameterEntity */ - nil, /* cdataBlock */ - nil, /* externalSubset */ - XML_SAX2_MAGIC, - nil, - startElementSAX, /* startElementNs */ - endElementSAX, /* endElementNs */ - nil /* serror */ -}; - - -void RSSAXInitLibXMLParser(void) { - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlInitParser(); - }); -} - diff --git a/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h b/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h deleted file mode 100644 index c9bd0008f..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h +++ /dev/null @@ -1,56 +0,0 @@ -// -// RSParser.h -// RSParser -// -// Created by Brent Simmons on 6/20/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -#import "../ParserData.h" -#import "../RSDateParser.h" - -// OPML - -#import "../RSOPMLParser.h" -#import "../RSOPMLDocument.h" -#import "../RSOPMLItem.h" -#import "../RSOPMLAttributes.h" -#import "../RSOPMLFeedSpecifier.h" -#import "../RSOPMLError.h" - -// For writing your own XML parser. - -#import "../RSSAXParser.h" - -// You should use FeedParser (Swift) instead of these two specific parsers -// and the objects they create. -// But they’re available if you want them. - -#import "../RSRSSParser.h" -#import "../RSAtomParser.h" -#import "../RSParsedFeed.h" -#import "../RSParsedArticle.h" -#import "../RSParsedEnclosure.h" -#import "../RSParsedAuthor.h" - -// HTML - -#import "../RSHTMLMetadataParser.h" -#import "../RSHTMLMetadata.h" -#import "../RSHTMLLinkParser.h" -#import "../RSSAXHTMLParser.h" // For writing your own HTML parser. -#import "../RSHTMLTag.h" - -// Utilities - -#import "../NSData+RSParser.h" -#import "../NSString+RSParser.h" - - - - - - diff --git a/NetNewsWire.xcodeproj/project.pbxproj b/NetNewsWire.xcodeproj/project.pbxproj index 18d63ad1d..6b0474782 100644 --- a/NetNewsWire.xcodeproj/project.pbxproj +++ b/NetNewsWire.xcodeproj/project.pbxproj @@ -385,12 +385,8 @@ 8454C3F8263F3AD400E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; }; 8456116B2BBD145200507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; }; 8456116C2BBD145200507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - 8456116E2BBD145200507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; }; - 8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 845611712BBD145D00507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; }; 845611722BBD145D00507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - 845611742BBD145D00507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; }; - 845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 845A29221FC9251E007B49E3 /* SidebarCellLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29211FC9251E007B49E3 /* SidebarCellLayout.swift */; }; 845A29241FC9255E007B49E3 /* SidebarCellAppearance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29231FC9255E007B49E3 /* SidebarCellAppearance.swift */; }; 845EE7B11FC2366500854A1F /* StarredFeedDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845EE7B01FC2366500854A1F /* StarredFeedDelegate.swift */; }; @@ -494,7 +490,6 @@ 84DC5FFE2BCE37A300F04682 /* AppDelegate+Shared.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84DC5FFD2BCE37A300F04682 /* AppDelegate+Shared.swift */; }; 84DC60002BCE37A300F04682 /* AppDelegate+Shared.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84DC5FFD2BCE37A300F04682 /* AppDelegate+Shared.swift */; }; 84DC60022BCE40B200F04682 /* Images in Frameworks */ = {isa = PBXBuildFile; productRef = 84DC60012BCE40B200F04682 /* Images */; }; - 84DC60042BCE40D000F04682 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 84DC60032BCE40D000F04682 /* ParserObjC */; }; 84DCA5122BABB75600792720 /* FoundationExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA5112BABB75600792720 /* FoundationExtras */; }; 84DCA5142BABB76100792720 /* AppKitExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA5132BABB76100792720 /* AppKitExtras */; }; 84DCA51E2BABB79900792720 /* FoundationExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA51D2BABB79900792720 /* FoundationExtras */; }; @@ -686,7 +681,6 @@ 8426DBC02BFDAEF200E98109 /* Web in Embed Frameworks */, 513F32782593EE6F0003048F /* Secrets in Embed Frameworks */, 513F327B2593EE6F0003048F /* SyncDatabase in Embed Frameworks */, - 845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */, 513F32722593EE6F0003048F /* Articles in Embed Frameworks */, 513F32812593EF180003048F /* Account in Embed Frameworks */, 8426DBB92BFDAD9200E98109 /* Core in Embed Frameworks */, @@ -741,7 +735,6 @@ 8426DBC32BFDAEFC00E98109 /* Web in Embed Frameworks */, 513277442590FBB60064F1E7 /* Account in Embed Frameworks */, 5132775F2590FC640064F1E7 /* Articles in Embed Frameworks */, - 8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */, 513277662590FC780064F1E7 /* Secrets in Embed Frameworks */, 513277652590FC640064F1E7 /* SyncDatabase in Embed Frameworks */, 8426DBB82BFDAD8500E98109 /* Core in Embed Frameworks */, @@ -1097,7 +1090,6 @@ 84A059EE2C3A4A570041209B /* FMDB */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = FMDB; sourceTree = ""; }; 84A059EF2C3A4A5B0041209B /* Web */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Web; sourceTree = ""; }; 84A059F02C3A4A5F0041209B /* Parser */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Parser; sourceTree = ""; }; - 84A059F12C3A4A620041209B /* ParserObjC */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = ParserObjC; sourceTree = ""; }; 84A059F22C3A4A670041209B /* Core */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Core; sourceTree = ""; }; 84A059F32C3A4A6C0041209B /* Tree */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Tree; sourceTree = ""; }; 84A059F42C3A4AA30041209B /* UIKitExtras */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = UIKitExtras; sourceTree = ""; }; @@ -1244,7 +1236,6 @@ 841CECDE2BAD06D10001EE72 /* Tree in Frameworks */, 51BC2F3824D3439A00E90810 /* Account in Frameworks */, 8426DBC82BFDAF4300E98109 /* Web in Frameworks */, - 84DC60042BCE40D000F04682 /* ParserObjC in Frameworks */, 84D9582C2BABE53B0053E7B2 /* FoundationExtras in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1291,7 +1282,6 @@ 84C1A8582BBBA5BD006E3E96 /* Web in Frameworks */, 516B695F24D2F33B00B5702F /* Account in Frameworks */, 84A699152BC34F3D00605AB8 /* ArticleExtractor in Frameworks */, - 845611742BBD145D00507B73 /* ParserObjC in Frameworks */, 845611712BBD145D00507B73 /* Parser in Frameworks */, 513F32712593EE6F0003048F /* Articles in Frameworks */, 513F32772593EE6F0003048F /* Secrets in Frameworks */, @@ -1329,7 +1319,6 @@ 841CECD82BAD04B20001EE72 /* Tree in Frameworks */, 8426DBC22BFDAEFC00E98109 /* Web in Frameworks */, 8456116B2BBD145200507B73 /* Parser in Frameworks */, - 8456116E2BBD145200507B73 /* ParserObjC in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2052,7 +2041,6 @@ 84A059EE2C3A4A570041209B /* FMDB */, 84A059EF2C3A4A5B0041209B /* Web */, 84A059F02C3A4A5F0041209B /* Parser */, - 84A059F12C3A4A620041209B /* ParserObjC */, 84A059F22C3A4A670041209B /* Core */, 84A059F32C3A4A6C0041209B /* Tree */, 84A059F42C3A4AA30041209B /* UIKitExtras */, @@ -2493,7 +2481,6 @@ 84D9582B2BABE53B0053E7B2 /* FoundationExtras */, 841CECDD2BAD06D10001EE72 /* Tree */, 84DC60012BCE40B200F04682 /* Images */, - 84DC60032BCE40D000F04682 /* ParserObjC */, 8426DBC72BFDAF4300E98109 /* Web */, ); productName = "NetNewsWire iOS Share Extension"; @@ -2608,7 +2595,6 @@ 841CECDB2BAD04BF0001EE72 /* Tree */, 84C1A8572BBBA5BD006E3E96 /* Web */, 845611702BBD145D00507B73 /* Parser */, - 845611732BBD145D00507B73 /* ParserObjC */, 8410C4A42BC1E28200D4F799 /* ReaderAPI */, 84A699142BC34F3D00605AB8 /* ArticleExtractor */, 84DC5FFB2BCE31DB00F04682 /* Images */, @@ -2657,7 +2643,6 @@ 8438C2DA2BABE0B00040C9EE /* CoreResources */, 841CECD72BAD04B20001EE72 /* Tree */, 8456116A2BBD145200507B73 /* Parser */, - 8456116D2BBD145200507B73 /* ParserObjC */, 84A699162BC34F4400605AB8 /* ArticleExtractor */, 84DC5FF92BCE31D200F04682 /* Images */, 8426DBC12BFDAEFC00E98109 /* Web */, @@ -4200,18 +4185,10 @@ isa = XCSwiftPackageProductDependency; productName = Parser; }; - 8456116D2BBD145200507B73 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 845611702BBD145D00507B73 /* Parser */ = { isa = XCSwiftPackageProductDependency; productName = Parser; }; - 845611732BBD145D00507B73 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 8479ABE22B9E906E00F84C4D /* Database */ = { isa = XCSwiftPackageProductDependency; productName = Database; @@ -4260,10 +4237,6 @@ isa = XCSwiftPackageProductDependency; productName = Images; }; - 84DC60032BCE40D000F04682 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 84DCA5112BABB75600792720 /* FoundationExtras */ = { isa = XCSwiftPackageProductDependency; productName = FoundationExtras; diff --git a/Shared/AppDelegate+Shared.swift b/Shared/AppDelegate+Shared.swift index 626ff7f2e..02484da2a 100644 --- a/Shared/AppDelegate+Shared.swift +++ b/Shared/AppDelegate+Shared.swift @@ -8,8 +8,8 @@ import Foundation import Images -import ParserObjC import Account +import Parser extension AppDelegate: FaviconDownloaderDelegate, FeedIconDownloaderDelegate { @@ -17,7 +17,7 @@ extension AppDelegate: FaviconDownloaderDelegate, FeedIconDownloaderDelegate { IconImage.appIcon } - func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata? { + func downloadMetadata(_ url: String) async throws -> HTMLMetadata? { await HTMLMetadataDownloader.downloadMetadata(for: url) } diff --git a/Shared/Extensions/ArticleStringFormatter.swift b/Shared/Extensions/ArticleStringFormatter.swift index a0d0c11e7..4d0484d08 100644 --- a/Shared/Extensions/ArticleStringFormatter.swift +++ b/Shared/Extensions/ArticleStringFormatter.swift @@ -66,7 +66,7 @@ import Parser s = s.replacingOccurrences(of: "\t", with: "") if !forHTML { - s = s.rsparser_stringByDecodingHTMLEntities() + s = HTMLEntityDecoder.decodedString(s) } s = s.trimmingWhitespace @@ -98,8 +98,9 @@ import Parser if let cachedBody = summaryCache[key] { return cachedBody } - var s = body.rsparser_stringByDecodingHTMLEntities() + var s = body s = s.strippingHTML(maxCharacters: 250) + s = HTMLEntityDecoder.decodedString(s) s = s.trimmingWhitespace s = s.collapsingWhitespace if s == "Comments" { // Hacker News. diff --git a/Shared/Extensions/NSAttributedString+NetNewsWire.swift b/Shared/Extensions/NSAttributedString+NetNewsWire.swift index d5b29c347..3e852d694 100644 --- a/Shared/Extensions/NSAttributedString+NetNewsWire.swift +++ b/Shared/Extensions/NSAttributedString+NetNewsWire.swift @@ -289,6 +289,6 @@ private struct CountedSet where Element: Hashable { private extension String { var decodedEntity: String { // It's possible the implementation will change, but for now it just calls this. - (self as NSString).rsparser_stringByDecodingHTMLEntities() as String + HTMLEntityDecoder.decodedString(self) } } diff --git a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift index da78fee44..ce6b88176 100644 --- a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift +++ b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift @@ -9,13 +9,12 @@ import Foundation import Web import Parser -import ParserObjC -extension RSHTMLMetadata: @unchecked Sendable {} +extension HTMLMetadata: @unchecked Sendable {} struct HTMLMetadataDownloader { - @MainActor static func downloadMetadata(for url: String) async -> RSHTMLMetadata? { + @MainActor static func downloadMetadata(for url: String) async -> HTMLMetadata? { guard let actualURL = URL(string: url) else { return nil @@ -34,10 +33,10 @@ struct HTMLMetadataDownloader { return nil } - @MainActor private static func parseMetadata(with parserData: ParserData) async -> RSHTMLMetadata? { + @MainActor private static func parseMetadata(with parserData: ParserData) async -> HTMLMetadata? { - let task = Task.detached { () -> RSHTMLMetadata? in - RSHTMLMetadataParser.htmlMetadata(with: parserData) + let task = Task.detached { () -> HTMLMetadata? in + HTMLMetadataParser.metadata(with: parserData) } return await task.value diff --git a/iOS/AppDelegate.swift b/iOS/AppDelegate.swift index 7a6dc68a7..56bda163d 100644 --- a/iOS/AppDelegate.swift +++ b/iOS/AppDelegate.swift @@ -55,6 +55,8 @@ class AppDelegate: UIResponder, UIApplicationDelegate, UNUserNotificationCenterD override init() { + xmlInitParser() + super.init() appDelegate = self