Merge branch 'SwiftParser'
This commit is contained in:
commit
01e7a66b78
@ -17,6 +17,7 @@ import OSLog
|
||||
import Core
|
||||
import CrashReporter
|
||||
import Images
|
||||
import libxml2
|
||||
|
||||
// If we're not going to import Sparkle, provide dummy protocols to make it easy
|
||||
// for AppDelegate to comply
|
||||
@ -104,6 +105,8 @@ import Sparkle
|
||||
|
||||
override init() {
|
||||
|
||||
xmlInitParser()
|
||||
|
||||
NSWindow.allowsAutomaticWindowTabbing = false
|
||||
super.init()
|
||||
|
||||
|
@ -12,7 +12,6 @@ let package = Package(
|
||||
],
|
||||
dependencies: [
|
||||
.package(path: "../Parser"),
|
||||
.package(path: "../ParserObjC"),
|
||||
.package(path: "../Articles"),
|
||||
.package(path: "../ArticlesDatabase"),
|
||||
.package(path: "../Web"),
|
||||
@ -35,7 +34,6 @@ let package = Package(
|
||||
name: "Account",
|
||||
dependencies: [
|
||||
"Parser",
|
||||
"ParserObjC",
|
||||
"Web",
|
||||
"Articles",
|
||||
"ArticlesDatabase",
|
||||
|
@ -13,7 +13,6 @@ import UIKit
|
||||
import Foundation
|
||||
import Articles
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Database
|
||||
import ArticlesDatabase
|
||||
import Web
|
||||
@ -465,14 +464,14 @@ public enum FetchType {
|
||||
delegate.accountWillBeDeleted(self)
|
||||
}
|
||||
|
||||
func addOPMLItems(_ items: [RSOPMLItem]) {
|
||||
func addOPMLItems(_ items: [OPMLItem]) {
|
||||
for item in items {
|
||||
if let feedSpecifier = item.feedSpecifier {
|
||||
addFeed(newFeed(with: feedSpecifier))
|
||||
} else {
|
||||
if let title = item.titleFromAttributes, let folder = ensureFolder(with: title) {
|
||||
folder.externalID = item.attributes?["nnw_externalID"] as? String
|
||||
if let children = item.children {
|
||||
if let children = item.items {
|
||||
for itemChild in children {
|
||||
if let feedSpecifier = itemChild.feedSpecifier {
|
||||
folder.addFeed(newFeed(with: feedSpecifier))
|
||||
@ -484,7 +483,7 @@ public enum FetchType {
|
||||
}
|
||||
}
|
||||
|
||||
func loadOPMLItems(_ items: [RSOPMLItem]) {
|
||||
func loadOPMLItems(_ items: [OPMLItem]) {
|
||||
addOPMLItems(OPMLNormalizer.normalize(items))
|
||||
}
|
||||
|
||||
@ -561,7 +560,7 @@ public enum FetchType {
|
||||
return folders?.first(where: { $0.externalID == externalID })
|
||||
}
|
||||
|
||||
func newFeed(with opmlFeedSpecifier: RSOPMLFeedSpecifier) -> Feed {
|
||||
func newFeed(with opmlFeedSpecifier: OPMLFeedSpecifier) -> Feed {
|
||||
let feedURL = opmlFeedSpecifier.feedURL
|
||||
let metadata = feedMetadata(feedURL: feedURL, feedID: feedURL)
|
||||
let feed = Feed(account: self, url: opmlFeedSpecifier.feedURL, metadata: metadata)
|
||||
|
@ -12,7 +12,6 @@ import SystemConfiguration
|
||||
import os.log
|
||||
import SyncDatabase
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Articles
|
||||
import ArticlesDatabase
|
||||
import Web
|
||||
@ -176,9 +175,9 @@ enum CloudKitAccountDelegateError: LocalizedError {
|
||||
|
||||
let opmlData = try Data(contentsOf: opmlFile)
|
||||
let parserData = ParserData(url: opmlFile.absoluteString, data: opmlData)
|
||||
let opmlDocument = try RSOPMLParser.parseOPML(with: parserData)
|
||||
let opmlDocument = OPMLParser.document(with: parserData)
|
||||
|
||||
guard let opmlItems = opmlDocument.children, let rootExternalID = account.externalID else {
|
||||
guard let opmlItems = opmlDocument?.items, let rootExternalID = account.externalID else {
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,6 @@
|
||||
import Foundation
|
||||
import os.log
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Articles
|
||||
import ArticlesDatabase
|
||||
import Web
|
||||
@ -78,8 +77,8 @@ final class LocalAccountDelegate: AccountDelegate {
|
||||
let opmlData = try Data(contentsOf: opmlFile)
|
||||
let parserData = ParserData(url: opmlFile.absoluteString, data: opmlData)
|
||||
|
||||
let opmlDocument = try RSOPMLParser.parseOPML(with: parserData)
|
||||
guard let children = opmlDocument.children else {
|
||||
let opmlDocument = OPMLParser.document(with: parserData)
|
||||
guard let children = opmlDocument?.items else {
|
||||
return
|
||||
}
|
||||
|
||||
@ -268,9 +267,7 @@ private extension LocalAccountDelegate {
|
||||
return
|
||||
}
|
||||
|
||||
let parserData = ParserData(url: feed.url, data: data)
|
||||
|
||||
guard let parsedFeed = try? await FeedParser.parse(parserData) else {
|
||||
guard let parsedFeed = try? await FeedParser.parseAsync(urlString: feed.url, data: data) else {
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,6 @@
|
||||
import Foundation
|
||||
import os
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Core
|
||||
|
||||
@MainActor final class OPMLFile {
|
||||
@ -34,7 +33,7 @@ import Core
|
||||
dataFile.markAsDirty()
|
||||
}
|
||||
|
||||
func opmlItems() -> [RSOPMLItem]? {
|
||||
func opmlItems() -> [OPMLItem]? {
|
||||
guard let fileData = opmlFileData() else {
|
||||
return nil
|
||||
}
|
||||
@ -62,18 +61,10 @@ private extension OPMLFile {
|
||||
return fileData
|
||||
}
|
||||
|
||||
func parsedOPMLItems(fileData: Data) -> [RSOPMLItem]? {
|
||||
func parsedOPMLItems(fileData: Data) -> [OPMLItem]? {
|
||||
let parserData = ParserData(url: fileURL.absoluteString, data: fileData)
|
||||
var opmlDocument: RSOPMLDocument?
|
||||
|
||||
do {
|
||||
opmlDocument = try RSOPMLParser.parseOPML(with: parserData)
|
||||
} catch {
|
||||
logger.error("OPML Import failed for \(self.fileURL): \(error.localizedDescription)")
|
||||
return nil
|
||||
}
|
||||
|
||||
return opmlDocument?.children
|
||||
let opmlDocument = OPMLParser.document(with: parserData)
|
||||
return opmlDocument?.items
|
||||
}
|
||||
|
||||
func opmlDocument() -> String {
|
||||
|
@ -8,20 +8,19 @@
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
import ParserObjC
|
||||
|
||||
final class OPMLNormalizer {
|
||||
|
||||
var normalizedOPMLItems = [RSOPMLItem]()
|
||||
var normalizedOPMLItems = [OPMLItem]()
|
||||
|
||||
static func normalize(_ items: [RSOPMLItem]) -> [RSOPMLItem] {
|
||||
static func normalize(_ items: [OPMLItem]) -> [OPMLItem] {
|
||||
let opmlNormalizer = OPMLNormalizer()
|
||||
opmlNormalizer.normalize(items)
|
||||
return opmlNormalizer.normalizedOPMLItems
|
||||
}
|
||||
|
||||
private func normalize(_ items: [RSOPMLItem], parentFolder: RSOPMLItem? = nil) {
|
||||
var feedsToAdd = [RSOPMLItem]()
|
||||
private func normalize(_ items: [OPMLItem], parentFolder: OPMLItem? = nil) {
|
||||
var feedsToAdd = [OPMLItem]()
|
||||
|
||||
for item in items {
|
||||
|
||||
@ -34,14 +33,14 @@ final class OPMLNormalizer {
|
||||
|
||||
guard let _ = item.titleFromAttributes else {
|
||||
// Folder doesn’t have a name, so it won’t be created, and its items will go one level up.
|
||||
if let itemChildren = item.children {
|
||||
if let itemChildren = item.items {
|
||||
normalize(itemChildren, parentFolder: parentFolder)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
feedsToAdd.append(item)
|
||||
if let itemChildren = item.children {
|
||||
if let itemChildren = item.items {
|
||||
if let parentFolder = parentFolder {
|
||||
normalize(itemChildren, parentFolder: parentFolder)
|
||||
} else {
|
||||
@ -52,8 +51,8 @@ final class OPMLNormalizer {
|
||||
|
||||
if let parentFolder = parentFolder {
|
||||
for feed in feedsToAdd {
|
||||
if !(parentFolder.children?.contains(where: { $0.feedSpecifier?.feedURL == feed.feedSpecifier?.feedURL}) ?? false) {
|
||||
parentFolder.addChild(feed)
|
||||
if !(parentFolder.items?.contains(where: { $0.feedSpecifier?.feedURL == feed.feedSpecifier?.feedURL}) ?? false) {
|
||||
parentFolder.add(feed)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -33,7 +33,7 @@ final class ArticleSearchInfo: Hashable {
|
||||
}
|
||||
|
||||
lazy var bodyForIndex: String = {
|
||||
let s = preferredText.rsparser_stringByDecodingHTMLEntities()
|
||||
let s = HTMLEntityDecoder.decodedString(preferredText)
|
||||
let sanitizedBody = s.strippingHTML().collapsingWhitespace
|
||||
|
||||
if let authorsNames = authorsNames {
|
||||
|
@ -10,7 +10,6 @@ import Foundation
|
||||
import os.log
|
||||
import Web
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import CloudKit
|
||||
import FoundationExtras
|
||||
|
||||
@ -57,12 +56,12 @@ enum CloudKitAccountZoneError: LocalizedError {
|
||||
migrateChangeToken()
|
||||
}
|
||||
|
||||
public func importOPML(rootExternalID: String, items: [RSOPMLItem]) async throws {
|
||||
public func importOPML(rootExternalID: String, items: [OPMLItem]) async throws {
|
||||
|
||||
var records = [CKRecord]()
|
||||
var feedRecords = [String: CKRecord]()
|
||||
|
||||
func processFeed(feedSpecifier: RSOPMLFeedSpecifier, containerExternalID: String) {
|
||||
func processFeed(feedSpecifier: OPMLFeedSpecifier, containerExternalID: String) {
|
||||
if let feedRecord = feedRecords[feedSpecifier.feedURL], var containerExternalIDs = feedRecord[CloudKitFeed.Fields.containerExternalIDs] as? [String] {
|
||||
containerExternalIDs.append(containerExternalID)
|
||||
feedRecord[CloudKitFeed.Fields.containerExternalIDs] = containerExternalIDs
|
||||
@ -80,7 +79,7 @@ enum CloudKitAccountZoneError: LocalizedError {
|
||||
if let title = item.titleFromAttributes {
|
||||
let containerRecord = newContainerCKRecord(name: title)
|
||||
records.append(containerRecord)
|
||||
item.children?.forEach { itemChild in
|
||||
item.items?.forEach { itemChild in
|
||||
if let feedSpecifier = itemChild.feedSpecifier {
|
||||
processFeed(feedSpecifier: feedSpecifier, containerExternalID: containerRecord.externalID)
|
||||
}
|
||||
@ -289,7 +288,7 @@ enum CloudKitAccountZoneError: LocalizedError {
|
||||
|
||||
private extension CloudKitAccountZone {
|
||||
|
||||
func newFeedCKRecord(feedSpecifier: RSOPMLFeedSpecifier, containerExternalID: String) -> CKRecord {
|
||||
func newFeedCKRecord(feedSpecifier: OPMLFeedSpecifier, containerExternalID: String) -> CKRecord {
|
||||
|
||||
let record = CKRecord(recordType: CloudKitFeed.recordType, recordID: generateRecordID())
|
||||
record[CloudKitFeed.Fields.url] = feedSpecifier.feedURL
|
||||
|
@ -9,7 +9,6 @@
|
||||
import Foundation
|
||||
import os.log
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Web
|
||||
import CloudKit
|
||||
import Articles
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Web
|
||||
import CommonErrors
|
||||
import os.log
|
||||
@ -56,7 +55,7 @@ public final class FeedFinder {
|
||||
throw AccountError.createErrorNotFound
|
||||
}
|
||||
|
||||
if FeedFinder.isFeed(data, url.absoluteString) {
|
||||
if FeedFinder.isFeed(data) {
|
||||
logger.info("FeedFinder: is feed \(url)")
|
||||
let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1)
|
||||
return Set([feedSpecifier])
|
||||
@ -157,7 +156,7 @@ private extension FeedFinder {
|
||||
|
||||
if let downloadData = try? await DownloadWithCacheManager.shared.download(url) {
|
||||
if let data = downloadData.data, let response = downloadData.response, response.statusIsOK {
|
||||
if isFeed(data, downloadFeedSpecifier.urlString) {
|
||||
if isFeed(data) {
|
||||
addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers)
|
||||
}
|
||||
}
|
||||
@ -167,8 +166,7 @@ private extension FeedFinder {
|
||||
return Set(resultFeedSpecifiers.values)
|
||||
}
|
||||
|
||||
static func isFeed(_ data: Data, _ urlString: String) -> Bool {
|
||||
let parserData = ParserData(url: urlString, data: data)
|
||||
return FeedParser.canParse(parserData)
|
||||
static func isFeed(_ data: Data) -> Bool {
|
||||
return FeedParser.canParse(data)
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,6 @@
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
import Parser
|
||||
import ParserObjC
|
||||
|
||||
private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"]
|
||||
|
||||
@ -22,18 +21,20 @@ class HTMLFeedFinder {
|
||||
private var feedSpecifiersDictionary = [String: FeedSpecifier]()
|
||||
|
||||
init(parserData: ParserData) {
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: parserData)
|
||||
let metadata = HTMLMetadataParser.metadata(with: parserData)
|
||||
var orderFound = 0
|
||||
|
||||
for oneFeedLink in metadata.feedLinks {
|
||||
if let oneURLString = oneFeedLink.urlString?.normalizedURL {
|
||||
orderFound = orderFound + 1
|
||||
let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead, orderFound: orderFound)
|
||||
addFeedSpecifier(oneFeedSpecifier)
|
||||
if let feedLinks = metadata.feedLinks {
|
||||
for oneFeedLink in feedLinks {
|
||||
if let oneURLString = oneFeedLink.urlString?.normalizedURL {
|
||||
orderFound = orderFound + 1
|
||||
let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead, orderFound: orderFound)
|
||||
addFeedSpecifier(oneFeedSpecifier)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let bodyLinks = RSHTMLLinkParser.htmlLinks(with: parserData)
|
||||
let bodyLinks = HTMLLinkParser.htmlLinks(with: parserData)
|
||||
for oneBodyLink in bodyLinks {
|
||||
if linkMightBeFeed(oneBodyLink), let normalizedURL = oneBodyLink.urlString?.normalizedURL {
|
||||
orderFound = orderFound + 1
|
||||
@ -71,7 +72,7 @@ private extension HTMLFeedFinder {
|
||||
return false
|
||||
}
|
||||
|
||||
func linkMightBeFeed(_ link: RSHTMLLink) -> Bool {
|
||||
func linkMightBeFeed(_ link: HTMLLink) -> Bool {
|
||||
if let linkURLString = link.urlString, urlStringMightBeFeed(linkURLString) {
|
||||
return true
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
import ParserObjC
|
||||
|
||||
public final class FeedbinEntry: Decodable, @unchecked Sendable {
|
||||
|
||||
@ -29,7 +28,7 @@ public final class FeedbinEntry: Decodable, @unchecked Sendable {
|
||||
// and letting the one date fail when parsed.
|
||||
public lazy var parsedDatePublished: Date? = {
|
||||
if let datePublished = datePublished {
|
||||
return RSDateWithString(datePublished)
|
||||
return DateParser.date(string: datePublished)
|
||||
}
|
||||
else {
|
||||
return nil
|
||||
|
@ -0,0 +1,10 @@
|
||||
//
|
||||
// Dictionary+Extensions.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/23/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public typealias StringDictionary = [String: String]
|
@ -12,7 +12,7 @@ import Articles
|
||||
import Account
|
||||
import UniformTypeIdentifiers
|
||||
import Core
|
||||
import ParserObjC
|
||||
import Parser
|
||||
|
||||
public extension Notification.Name {
|
||||
static let FaviconDidBecomeAvailable = Notification.Name("FaviconDidBecomeAvailableNotification") // userInfo key: FaviconDownloader.UserInfoKey.faviconURL
|
||||
@ -22,7 +22,7 @@ public protocol FaviconDownloaderDelegate {
|
||||
|
||||
@MainActor var appIconImage: IconImage? { get }
|
||||
|
||||
@MainActor func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata?
|
||||
@MainActor func downloadMetadata(_ url: String) async throws -> HTMLMetadata?
|
||||
}
|
||||
|
||||
@MainActor public final class FaviconDownloader {
|
||||
|
@ -9,7 +9,6 @@
|
||||
import Foundation
|
||||
import CoreServices
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import UniformTypeIdentifiers
|
||||
|
||||
// The favicon URLs may be specified in the head section of the home page.
|
||||
@ -23,7 +22,7 @@ import UniformTypeIdentifiers
|
||||
/// - Parameters:
|
||||
/// - homePageURL: The page to search.
|
||||
/// - urls: An array of favicon URLs as strings.
|
||||
static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> RSHTMLMetadata?)) async -> [String]? {
|
||||
static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> HTMLMetadata?)) async -> [String]? {
|
||||
|
||||
guard let _ = URL(string: homePageURL) else {
|
||||
return nil
|
||||
@ -32,14 +31,14 @@ import UniformTypeIdentifiers
|
||||
// If the favicon has an explicit type, check that for an ignored type; otherwise, check the file extension.
|
||||
let htmlMetadata = try? await downloadMetadata(homePageURL)
|
||||
|
||||
let faviconURLs = htmlMetadata?.favicons.compactMap { favicon -> String? in
|
||||
let faviconURLs = htmlMetadata?.favicons?.compactMap { favicon -> String? in
|
||||
shouldAllowFavicon(favicon) ? favicon.urlString : nil
|
||||
}
|
||||
|
||||
return faviconURLs
|
||||
}
|
||||
|
||||
static func shouldAllowFavicon(_ favicon: RSHTMLMetadataFavicon) -> Bool {
|
||||
static func shouldAllowFavicon(_ favicon: HTMLMetadataFavicon) -> Bool {
|
||||
|
||||
// Check mime type.
|
||||
if let mimeType = favicon.type, let utType = UTType(mimeType: mimeType) {
|
||||
|
@ -87,7 +87,7 @@
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// func pullFeaturedImageURL(from metadata: RSHTMLMetadata, articleURL: String) {
|
||||
// func pullFeaturedImageURL(from metadata: HTMLMetadata, articleURL: String) {
|
||||
//
|
||||
// if let url = metadata.bestFeaturedImageURL() {
|
||||
// cacheURL(for: articleURL, url)
|
||||
|
@ -11,7 +11,6 @@ import Articles
|
||||
import Account
|
||||
import Web
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Core
|
||||
|
||||
public extension Notification.Name {
|
||||
@ -23,7 +22,7 @@ public protocol FeedIconDownloaderDelegate: Sendable {
|
||||
|
||||
@MainActor var appIconImage: IconImage? { get }
|
||||
|
||||
func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata?
|
||||
func downloadMetadata(_ url: String) async throws -> HTMLMetadata?
|
||||
}
|
||||
|
||||
@MainActor public final class FeedIconDownloader {
|
||||
@ -218,7 +217,7 @@ private extension FeedIconDownloader {
|
||||
homePageToIconURLCacheDirty = true
|
||||
}
|
||||
|
||||
func findIconURLForHomePageURL(_ homePageURL: String, feed: Feed, downloadMetadata: @escaping (String) async throws -> RSHTMLMetadata?) {
|
||||
func findIconURLForHomePageURL(_ homePageURL: String, feed: Feed, downloadMetadata: @escaping (String) async throws -> HTMLMetadata?) {
|
||||
|
||||
guard !urlsInProgress.contains(homePageURL) else {
|
||||
return
|
||||
@ -237,7 +236,7 @@ private extension FeedIconDownloader {
|
||||
}
|
||||
}
|
||||
|
||||
func pullIconURL(from metadata: RSHTMLMetadata, homePageURL: String, feed: Feed) {
|
||||
func pullIconURL(from metadata: HTMLMetadata, homePageURL: String, feed: Feed) {
|
||||
|
||||
if let url = metadata.bestWebsiteIconURL() {
|
||||
cacheIconURL(for: homePageURL, url)
|
||||
|
67
Modules/Images/Sources/Images/HTMLMetadata+Extension.swift
Normal file
67
Modules/Images/Sources/Images/HTMLMetadata+Extension.swift
Normal file
@ -0,0 +1,67 @@
|
||||
//
|
||||
// HTMLMetadata+Extension.swift
|
||||
// NetNewsWire
|
||||
//
|
||||
// Created by Brent Simmons on 11/26/17.
|
||||
// Copyright © 2017 Ranchero Software. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
|
||||
extension HTMLMetadata {
|
||||
|
||||
func largestAppleTouchIcon() -> String? {
|
||||
|
||||
guard let icons = appleTouchIcons, !icons.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var bestImage: HTMLMetadataAppleTouchIcon? = nil
|
||||
|
||||
for image in icons {
|
||||
|
||||
guard let imageSize = image.size else {
|
||||
continue
|
||||
}
|
||||
if imageSize.width / imageSize.height > 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
guard let currentBestImage = bestImage, let bestImageSize = currentBestImage.size else {
|
||||
bestImage = image
|
||||
continue
|
||||
}
|
||||
|
||||
if imageSize.height > bestImageSize.height && imageSize.width > bestImageSize.width {
|
||||
bestImage = image
|
||||
}
|
||||
}
|
||||
|
||||
return bestImage?.urlString ?? icons.first?.urlString
|
||||
}
|
||||
|
||||
func bestWebsiteIconURL() -> String? {
|
||||
|
||||
// TODO: metadata icons — sometimes they’re large enough to use here.
|
||||
|
||||
if let appleTouchIcon = largestAppleTouchIcon() {
|
||||
return appleTouchIcon
|
||||
}
|
||||
|
||||
if let openGraphImageURL = openGraphProperties?.image {
|
||||
return openGraphImageURL.url
|
||||
}
|
||||
|
||||
return twitterProperties?.imageURL
|
||||
}
|
||||
|
||||
func bestFeaturedImageURL() -> String? {
|
||||
|
||||
if let openGraphImageURL = openGraphProperties?.image {
|
||||
return openGraphImageURL.url
|
||||
}
|
||||
|
||||
return twitterProperties?.imageURL
|
||||
}
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
//
|
||||
// RSHTMLMetadata+Extension.swift
|
||||
// NetNewsWire
|
||||
//
|
||||
// Created by Brent Simmons on 11/26/17.
|
||||
// Copyright © 2017 Ranchero Software. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
import ParserObjC
|
||||
|
||||
extension RSHTMLMetadata {
|
||||
|
||||
func largestOpenGraphImageURL() -> String? {
|
||||
let openGraphImages = openGraphProperties.images
|
||||
|
||||
guard !openGraphImages.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var bestImage: RSHTMLOpenGraphImage? = nil
|
||||
|
||||
for image in openGraphImages {
|
||||
if image.width / image.height > 2 {
|
||||
continue
|
||||
}
|
||||
if bestImage == nil {
|
||||
bestImage = image
|
||||
continue
|
||||
}
|
||||
if image.height > bestImage!.height && image.width > bestImage!.width {
|
||||
bestImage = image
|
||||
}
|
||||
}
|
||||
|
||||
guard let url = bestImage?.secureURL ?? bestImage?.url else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Bad ones we should ignore.
|
||||
let badURLs = Set(["https://s0.wp.com/i/blank.jpg"])
|
||||
guard !badURLs.contains(url) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return url
|
||||
}
|
||||
|
||||
func largestAppleTouchIcon() -> String? {
|
||||
|
||||
let icons = appleTouchIcons
|
||||
|
||||
guard !icons.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var bestImage: RSHTMLMetadataAppleTouchIcon? = nil
|
||||
|
||||
for image in icons {
|
||||
if image.size.width / image.size.height > 2 {
|
||||
continue
|
||||
}
|
||||
if bestImage == nil {
|
||||
bestImage = image
|
||||
continue
|
||||
}
|
||||
if image.size.height > bestImage!.size.height && image.size.width > bestImage!.size.width {
|
||||
bestImage = image;
|
||||
}
|
||||
}
|
||||
|
||||
return bestImage?.urlString
|
||||
}
|
||||
|
||||
func bestWebsiteIconURL() -> String? {
|
||||
|
||||
// TODO: metadata icons — sometimes they’re large enough to use here.
|
||||
|
||||
if let appleTouchIcon = largestAppleTouchIcon() {
|
||||
return appleTouchIcon
|
||||
}
|
||||
|
||||
if let openGraphImageURL = largestOpenGraphImageURL() {
|
||||
return openGraphImageURL
|
||||
}
|
||||
|
||||
return twitterProperties.imageURL
|
||||
}
|
||||
|
||||
func bestFeaturedImageURL() -> String? {
|
||||
|
||||
if let openGraphImageURL = largestOpenGraphImageURL() {
|
||||
return openGraphImageURL
|
||||
}
|
||||
|
||||
return twitterProperties.imageURL
|
||||
}
|
||||
}
|
@ -8,7 +8,6 @@
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
import ParserObjC
|
||||
import Web
|
||||
|
||||
public struct InitialFeedDownloader {
|
||||
@ -23,8 +22,7 @@ public struct InitialFeedDownloader {
|
||||
return nil
|
||||
}
|
||||
|
||||
let parserData = ParserData(url: url.absoluteString, data: data)
|
||||
guard let parsedFeed = try? await FeedParser.parse(parserData) else {
|
||||
guard let parsedFeed = try? FeedParser.parse(urlString: url.absoluteString, data: data) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -29,6 +29,18 @@
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
shouldAutocreateTestPlan = "YES">
|
||||
<Testables>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "NewsBlurTests"
|
||||
BuildableName = "NewsBlurTests"
|
||||
BlueprintName = "NewsBlurTests"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
buildConfiguration = "Debug"
|
||||
|
@ -13,14 +13,12 @@ let package = Package(
|
||||
dependencies: [
|
||||
.package(path: "../Web"),
|
||||
.package(path: "../Secrets"),
|
||||
.package(path: "../Parser"),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "NewsBlur",
|
||||
dependencies: [
|
||||
"Web",
|
||||
"Parser",
|
||||
"Secrets"
|
||||
],
|
||||
swiftSettings: [
|
||||
|
@ -7,7 +7,6 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
|
||||
public typealias NewsBlurFolder = NewsBlurFeedsResponse.Folder
|
||||
|
||||
|
@ -7,7 +7,6 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
|
||||
public typealias NewsBlurStory = NewsBlurStoriesResponse.Story
|
||||
|
||||
|
@ -7,7 +7,6 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import Parser
|
||||
|
||||
public typealias NewsBlurStoryHash = NewsBlurStoryHashesResponse.StoryHash
|
||||
|
||||
|
@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>classNames</key>
|
||||
<dict>
|
||||
<key>DateParserTests</key>
|
||||
<dict>
|
||||
<key>testPubDateParsingPerformance()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.000131</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>testW3CParsingPerformance()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.000121</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,33 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>runDestinationsByUUID</key>
|
||||
<dict>
|
||||
<key>78BB49A7-AEB4-40A1-83DA-EB9C5755E396</key>
|
||||
<dict>
|
||||
<key>localComputer</key>
|
||||
<dict>
|
||||
<key>busSpeedInMHz</key>
|
||||
<integer>0</integer>
|
||||
<key>cpuCount</key>
|
||||
<integer>1</integer>
|
||||
<key>cpuKind</key>
|
||||
<string>Apple M1 Max</string>
|
||||
<key>cpuSpeedInMHz</key>
|
||||
<integer>0</integer>
|
||||
<key>logicalCPUCoresPerPackage</key>
|
||||
<integer>10</integer>
|
||||
<key>modelCode</key>
|
||||
<string>Mac13,1</string>
|
||||
<key>physicalCPUCoresPerPackage</key>
|
||||
<integer>10</integer>
|
||||
<key>platformIdentifier</key>
|
||||
<string>com.apple.platform.macosx</string>
|
||||
</dict>
|
||||
<key>targetArchitecture</key>
|
||||
<string>arm64e</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,52 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>classNames</key>
|
||||
<dict>
|
||||
<key>FeedParserTypeTests</key>
|
||||
<dict>
|
||||
<key>testFeedTypePerformance()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.000010</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>testFeedTypePerformance2()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.000010</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>testFeedTypePerformance3()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.000499</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>testFeedTypePerformance4()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.000691</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,33 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>runDestinationsByUUID</key>
|
||||
<dict>
|
||||
<key>9A7464E0-C633-49A0-871F-1F5206C35DE8</key>
|
||||
<dict>
|
||||
<key>localComputer</key>
|
||||
<dict>
|
||||
<key>busSpeedInMHz</key>
|
||||
<integer>0</integer>
|
||||
<key>cpuCount</key>
|
||||
<integer>1</integer>
|
||||
<key>cpuKind</key>
|
||||
<string>Apple M1 Max</string>
|
||||
<key>cpuSpeedInMHz</key>
|
||||
<integer>0</integer>
|
||||
<key>logicalCPUCoresPerPackage</key>
|
||||
<integer>10</integer>
|
||||
<key>modelCode</key>
|
||||
<string>Mac13,1</string>
|
||||
<key>physicalCPUCoresPerPackage</key>
|
||||
<integer>10</integer>
|
||||
<key>platformIdentifier</key>
|
||||
<string>com.apple.platform.macosx</string>
|
||||
</dict>
|
||||
<key>targetArchitecture</key>
|
||||
<string>arm64e</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>classNames</key>
|
||||
<dict>
|
||||
<key>OPMLTests</key>
|
||||
<dict>
|
||||
<key>testOPMLParsingPerformance()</key>
|
||||
<dict>
|
||||
<key>com.apple.XCTPerformanceMetric_WallClockTime</key>
|
||||
<dict>
|
||||
<key>baselineAverage</key>
|
||||
<real>0.002870</real>
|
||||
<key>baselineIntegrationDisplayName</key>
|
||||
<string>Local Baseline</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,33 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>runDestinationsByUUID</key>
|
||||
<dict>
|
||||
<key>8F8BFCF6-AACD-45D7-B626-1B58CDE0924D</key>
|
||||
<dict>
|
||||
<key>localComputer</key>
|
||||
<dict>
|
||||
<key>busSpeedInMHz</key>
|
||||
<integer>0</integer>
|
||||
<key>cpuCount</key>
|
||||
<integer>1</integer>
|
||||
<key>cpuKind</key>
|
||||
<string>Apple M1 Max</string>
|
||||
<key>cpuSpeedInMHz</key>
|
||||
<integer>0</integer>
|
||||
<key>logicalCPUCoresPerPackage</key>
|
||||
<integer>10</integer>
|
||||
<key>modelCode</key>
|
||||
<string>Mac13,1</string>
|
||||
<key>physicalCPUCoresPerPackage</key>
|
||||
<integer>10</integer>
|
||||
<key>platformIdentifier</key>
|
||||
<string>com.apple.platform.macosx</string>
|
||||
</dict>
|
||||
<key>targetArchitecture</key>
|
||||
<string>arm64e</string>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,175 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
LastUpgradeVersion = "1530"
|
||||
version = "1.7">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES"
|
||||
buildArchitectures = "Automatic">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "OPMLParser"
|
||||
BuildableName = "OPMLParser"
|
||||
BlueprintName = "OPMLParser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "Parser"
|
||||
BuildableName = "Parser"
|
||||
BlueprintName = "Parser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "SAX"
|
||||
BuildableName = "SAX"
|
||||
BlueprintName = "SAX"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "FeedParser"
|
||||
BuildableName = "FeedParser"
|
||||
BlueprintName = "FeedParser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "HTMLParser"
|
||||
BuildableName = "HTMLParser"
|
||||
BlueprintName = "HTMLParser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
buildConfiguration = "Debug"
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
shouldAutocreateTestPlan = "YES">
|
||||
<Testables>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "OPMLParserTests"
|
||||
BuildableName = "OPMLParserTests"
|
||||
BlueprintName = "OPMLParserTests"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "ParserTests"
|
||||
BuildableName = "ParserTests"
|
||||
BlueprintName = "ParserTests"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "FeedParserTests"
|
||||
BuildableName = "FeedParserTests"
|
||||
BlueprintName = "FeedParserTests"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "DateParserTests"
|
||||
BuildableName = "DateParserTests"
|
||||
BlueprintName = "DateParserTests"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
<TestableReference
|
||||
skipped = "NO">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "HTMLParserTests"
|
||||
BuildableName = "HTMLParserTests"
|
||||
BlueprintName = "HTMLParserTests"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</TestableReference>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
buildConfiguration = "Debug"
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
ignoresPersistentStateOnLaunch = "NO"
|
||||
debugDocumentVersioning = "YES"
|
||||
debugServiceExtension = "internal"
|
||||
allowLocationSimulation = "YES">
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
buildConfiguration = "Release"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
debugDocumentVersioning = "YES">
|
||||
<MacroExpansion>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "OPMLParser"
|
||||
BuildableName = "OPMLParser"
|
||||
BlueprintName = "OPMLParser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</MacroExpansion>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -1,67 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
LastUpgradeVersion = "1530"
|
||||
version = "1.7">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES"
|
||||
buildArchitectures = "Automatic">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "Parser"
|
||||
BuildableName = "Parser"
|
||||
BlueprintName = "Parser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
buildConfiguration = "Debug"
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
shouldAutocreateTestPlan = "YES">
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
buildConfiguration = "Debug"
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
ignoresPersistentStateOnLaunch = "NO"
|
||||
debugDocumentVersioning = "YES"
|
||||
debugServiceExtension = "internal"
|
||||
allowLocationSimulation = "YES">
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
buildConfiguration = "Release"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
debugDocumentVersioning = "YES">
|
||||
<MacroExpansion>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "Parser"
|
||||
BuildableName = "Parser"
|
||||
BlueprintName = "Parser"
|
||||
ReferencedContainer = "container:">
|
||||
</BuildableReference>
|
||||
</MacroExpansion>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -11,25 +11,28 @@ let package = Package(
|
||||
.library(
|
||||
name: "Parser",
|
||||
type: .dynamic,
|
||||
targets: ["Parser"]),
|
||||
targets: ["Parser"])
|
||||
],
|
||||
dependencies: [
|
||||
.package(path: "../ParserObjC"),
|
||||
.package(path: "../FoundationExtras"),
|
||||
],
|
||||
targets: [
|
||||
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
|
||||
// Targets can depend on other targets in this package, and on products in packages this package depends on.
|
||||
.target(
|
||||
name: "Parser",
|
||||
dependencies: ["ParserObjC"],
|
||||
dependencies: [
|
||||
"FoundationExtras",
|
||||
],
|
||||
swiftSettings: [
|
||||
.enableExperimentalFeature("StrictConcurrency")
|
||||
]),
|
||||
.testTarget(
|
||||
name: "ParserTests",
|
||||
dependencies: ["Parser", "ParserObjC"],
|
||||
dependencies: [
|
||||
"Parser"
|
||||
],
|
||||
exclude: ["Info.plist"],
|
||||
resources: [.copy("Resources")]),
|
||||
]
|
||||
)
|
||||
|
||||
|
587
Modules/Parser/Sources/Parser/DateParser/DateParser.swift
Normal file
587
Modules/Parser/Sources/Parser/DateParser/DateParser.swift
Normal file
@ -0,0 +1,587 @@
|
||||
//
|
||||
// DateParser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/28/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class DateParser {
|
||||
|
||||
// MARK: - Public API
|
||||
|
||||
/// Parse W3C and pubDate dates — used for feed parsing.
|
||||
/// This is a fast alternative to system APIs
|
||||
/// for parsing dates.
|
||||
public static func date(data: Data) -> Date? {
|
||||
|
||||
let numberOfBytes = data.count
|
||||
|
||||
// Make sure it’s in reasonable range for a date string.
|
||||
if numberOfBytes < 6 || numberOfBytes > 150 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return data.withUnsafeBytes { bytes in
|
||||
let buffer = bytes.bindMemory(to: UInt8.self)
|
||||
|
||||
if dateIsW3CDate(buffer, numberOfBytes) {
|
||||
return parseW3CDate(buffer, numberOfBytes)
|
||||
}
|
||||
else if dateIsPubDate(buffer, numberOfBytes) {
|
||||
return parsePubDate(buffer, numberOfBytes)
|
||||
}
|
||||
|
||||
// Fallback, in case our detection fails.
|
||||
return parseW3CDate(buffer, numberOfBytes)
|
||||
}
|
||||
}
|
||||
|
||||
public static func date(string: String) -> Date? {
|
||||
|
||||
guard let data = string.data(using: .utf8) else {
|
||||
return nil
|
||||
}
|
||||
return date(data: data)
|
||||
}
|
||||
|
||||
private typealias DateBuffer = UnsafeBufferPointer<UInt8>
|
||||
|
||||
// See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list
|
||||
private static let timeZoneTable: [String: Int] = [
|
||||
|
||||
"GMT": timeZoneOffset(0, 0),
|
||||
"UTC": timeZoneOffset(0, 0),
|
||||
"PDT": timeZoneOffset(-7, 0),
|
||||
"PST": timeZoneOffset(-8, 0),
|
||||
"EST": timeZoneOffset(-5, 0),
|
||||
"EDT": timeZoneOffset(-4, 0),
|
||||
"MDT": timeZoneOffset(-6, 0),
|
||||
"MST": timeZoneOffset(-7, 0),
|
||||
"CST": timeZoneOffset(-6, 0),
|
||||
"CDT": timeZoneOffset(-5, 0),
|
||||
"ACT": timeZoneOffset(-8, 0),
|
||||
"AFT": timeZoneOffset(4, 30),
|
||||
"AMT": timeZoneOffset(4, 0),
|
||||
"ART": timeZoneOffset(-3, 0),
|
||||
"AST": timeZoneOffset(3, 0),
|
||||
"AZT": timeZoneOffset(4, 0),
|
||||
"BIT": timeZoneOffset(-12, 0),
|
||||
"BDT": timeZoneOffset(8, 0),
|
||||
"ACST": timeZoneOffset(9, 30),
|
||||
"AEST": timeZoneOffset(10, 0),
|
||||
"AKST": timeZoneOffset(-9, 0),
|
||||
"AMST": timeZoneOffset(5, 0),
|
||||
"AWST": timeZoneOffset(8, 0),
|
||||
"AZOST": timeZoneOffset(-1, 0),
|
||||
"BIOT": timeZoneOffset(6, 0),
|
||||
"BRT": timeZoneOffset(-3, 0),
|
||||
"BST": timeZoneOffset(6, 0),
|
||||
"BTT": timeZoneOffset(6, 0),
|
||||
"CAT": timeZoneOffset(2, 0),
|
||||
"CCT": timeZoneOffset(6, 30),
|
||||
"CET": timeZoneOffset(1, 0),
|
||||
"CEST": timeZoneOffset(2, 0),
|
||||
"CHAST": timeZoneOffset(12, 45),
|
||||
"ChST": timeZoneOffset(10, 0),
|
||||
"CIST": timeZoneOffset(-8, 0),
|
||||
"CKT": timeZoneOffset(-10, 0),
|
||||
"CLT": timeZoneOffset(-4, 0),
|
||||
"CLST": timeZoneOffset(-3, 0),
|
||||
"COT": timeZoneOffset(-5, 0),
|
||||
"COST": timeZoneOffset(-4, 0),
|
||||
"CVT": timeZoneOffset(-1, 0),
|
||||
"CXT": timeZoneOffset(7, 0),
|
||||
"EAST": timeZoneOffset(-6, 0),
|
||||
"EAT": timeZoneOffset(3, 0),
|
||||
"ECT": timeZoneOffset(-4, 0),
|
||||
"EEST": timeZoneOffset(3, 0),
|
||||
"EET": timeZoneOffset(2, 0),
|
||||
"FJT": timeZoneOffset(12, 0),
|
||||
"FKST": timeZoneOffset(-4, 0),
|
||||
"GALT": timeZoneOffset(-6, 0),
|
||||
"GET": timeZoneOffset(4, 0),
|
||||
"GFT": timeZoneOffset(-3, 0),
|
||||
"GILT": timeZoneOffset(7, 0),
|
||||
"GIT": timeZoneOffset(-9, 0),
|
||||
"GST": timeZoneOffset(-2, 0),
|
||||
"GYT": timeZoneOffset(-4, 0),
|
||||
"HAST": timeZoneOffset(-10, 0),
|
||||
"HKT": timeZoneOffset(8, 0),
|
||||
"HMT": timeZoneOffset(5, 0),
|
||||
"IRKT": timeZoneOffset(8, 0),
|
||||
"IRST": timeZoneOffset(3, 30),
|
||||
"IST": timeZoneOffset(2, 0),
|
||||
"JST": timeZoneOffset(9, 0),
|
||||
"KRAT": timeZoneOffset(7, 0),
|
||||
"KST": timeZoneOffset(9, 0),
|
||||
"LHST": timeZoneOffset(10, 30),
|
||||
"LINT": timeZoneOffset(14, 0),
|
||||
"MAGT": timeZoneOffset(11, 0),
|
||||
"MIT": timeZoneOffset(-9, 30),
|
||||
"MSK": timeZoneOffset(3, 0),
|
||||
"MUT": timeZoneOffset(4, 0),
|
||||
"NDT": timeZoneOffset(-2, 30),
|
||||
"NFT": timeZoneOffset(11, 30),
|
||||
"NPT": timeZoneOffset(5, 45),
|
||||
"NT": timeZoneOffset(-3, 30),
|
||||
"OMST": timeZoneOffset(6, 0),
|
||||
"PETT": timeZoneOffset(12, 0),
|
||||
"PHOT": timeZoneOffset(13, 0),
|
||||
"PKT": timeZoneOffset(5, 0),
|
||||
"RET": timeZoneOffset(4, 0),
|
||||
"SAMT": timeZoneOffset(4, 0),
|
||||
"SAST": timeZoneOffset(2, 0),
|
||||
"SBT": timeZoneOffset(11, 0),
|
||||
"SCT": timeZoneOffset(4, 0),
|
||||
"SLT": timeZoneOffset(5, 30),
|
||||
"SST": timeZoneOffset(8, 0),
|
||||
"TAHT": timeZoneOffset(-10, 0),
|
||||
"THA": timeZoneOffset(7, 0),
|
||||
"UYT": timeZoneOffset(-3, 0),
|
||||
"UYST": timeZoneOffset(-2, 0),
|
||||
"VET": timeZoneOffset(-4, 30),
|
||||
"VLAT": timeZoneOffset(10, 0),
|
||||
"WAT": timeZoneOffset(1, 0),
|
||||
"WET": timeZoneOffset(0, 0),
|
||||
"WEST": timeZoneOffset(1, 0),
|
||||
"YAKT": timeZoneOffset(9, 0),
|
||||
"YEKT": timeZoneOffset(5, 0)
|
||||
]
|
||||
}
|
||||
|
||||
// MARK: - Private
|
||||
|
||||
private extension DateParser {
|
||||
|
||||
struct DateCharacter {
|
||||
|
||||
static let space = Character(" ").asciiValue!
|
||||
static let `return` = Character("\r").asciiValue!
|
||||
static let newline = Character("\n").asciiValue!
|
||||
static let tab = Character("\t").asciiValue!
|
||||
static let hyphen = Character("-").asciiValue!
|
||||
static let comma = Character(",").asciiValue!
|
||||
static let dot = Character(".").asciiValue!
|
||||
static let colon = Character(":").asciiValue!
|
||||
static let plus = Character("+").asciiValue!
|
||||
static let minus = Character("-").asciiValue!
|
||||
static let A = Character("A").asciiValue!
|
||||
static let a = Character("a").asciiValue!
|
||||
static let D = Character("D").asciiValue!
|
||||
static let d = Character("d").asciiValue!
|
||||
static let F = Character("F").asciiValue!
|
||||
static let f = Character("f").asciiValue!
|
||||
static let J = Character("J").asciiValue!
|
||||
static let j = Character("j").asciiValue!
|
||||
static let M = Character("M").asciiValue!
|
||||
static let m = Character("m").asciiValue!
|
||||
static let N = Character("N").asciiValue!
|
||||
static let n = Character("n").asciiValue!
|
||||
static let O = Character("O").asciiValue!
|
||||
static let o = Character("o").asciiValue!
|
||||
static let S = Character("S").asciiValue!
|
||||
static let s = Character("s").asciiValue!
|
||||
static let U = Character("U").asciiValue!
|
||||
static let u = Character("u").asciiValue!
|
||||
static let Y = Character("Y").asciiValue!
|
||||
static let y = Character("y").asciiValue!
|
||||
static let Z = Character("Z").asciiValue!
|
||||
static let z = Character("z").asciiValue!
|
||||
}
|
||||
|
||||
enum Month: Int {
|
||||
|
||||
case January = 1,
|
||||
February,
|
||||
March,
|
||||
April,
|
||||
May,
|
||||
June,
|
||||
July,
|
||||
August,
|
||||
September,
|
||||
October,
|
||||
November,
|
||||
December
|
||||
}
|
||||
|
||||
// MARK: - Standard Formats
|
||||
|
||||
private static func dateIsW3CDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Bool {
|
||||
|
||||
// Something like 2010-11-17T08:40:07-05:00
|
||||
// But might be missing T character in the middle.
|
||||
// Looks for four digits in a row followed by a -.
|
||||
|
||||
for i in 0..<numberOfBytes - 4 {
|
||||
|
||||
let ch = bytes[i]
|
||||
// Skip whitespace.
|
||||
if ch == DateCharacter.space || ch == DateCharacter.`return` || ch == DateCharacter.newline || ch == DateCharacter.tab {
|
||||
continue
|
||||
}
|
||||
|
||||
assert(i + 4 < numberOfBytes)
|
||||
// First non-whitespace character must be the beginning of the year, as in `2010-`
|
||||
return Bool(isDigit(ch)) && isDigit(bytes[i + 1]) && isDigit(bytes[i + 2]) && isDigit(bytes[i + 3]) && bytes[i + 4] == DateCharacter.hyphen
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
private static func dateIsPubDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Bool {
|
||||
|
||||
for ch in bytes {
|
||||
if ch == DateCharacter.space || ch == DateCharacter.comma {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
private static func parseW3CDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Date? {
|
||||
|
||||
/*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss"
|
||||
@"yyyy-MM-dd'T'HH:mm:sszzz"
|
||||
@"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz"
|
||||
etc.*/
|
||||
|
||||
var finalIndex = 0
|
||||
|
||||
guard let year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex) else {
|
||||
return nil
|
||||
}
|
||||
guard let month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) else {
|
||||
return nil
|
||||
}
|
||||
guard let day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) else {
|
||||
return nil
|
||||
}
|
||||
let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
|
||||
let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
|
||||
let second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
|
||||
|
||||
var currentIndex = finalIndex + 1
|
||||
|
||||
let milliseconds = {
|
||||
var ms = 0
|
||||
let hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.dot)
|
||||
if hasMilliseconds {
|
||||
ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex) ?? 00
|
||||
currentIndex = finalIndex + 1
|
||||
}
|
||||
|
||||
// Ignore more than 3 digits of precision
|
||||
while currentIndex < numberOfBytes && isDigit(bytes[currentIndex]) {
|
||||
currentIndex += 1
|
||||
}
|
||||
|
||||
return ms
|
||||
}()
|
||||
|
||||
let timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex)
|
||||
|
||||
return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset)
|
||||
}
|
||||
|
||||
private static func parsePubDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Date? {
|
||||
|
||||
var finalIndex = 0
|
||||
|
||||
let day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex) ?? 1
|
||||
let month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex) ?? .January
|
||||
|
||||
guard let year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
|
||||
let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0
|
||||
|
||||
var currentIndex = finalIndex + 1
|
||||
|
||||
let second = {
|
||||
var s = 0
|
||||
let hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.colon)
|
||||
if hasSeconds {
|
||||
s = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex) ?? 0
|
||||
}
|
||||
return s
|
||||
}()
|
||||
|
||||
currentIndex = finalIndex + 1
|
||||
|
||||
let timeZoneOffset = {
|
||||
var offset = 0
|
||||
let hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.space)
|
||||
if hasTimeZone {
|
||||
offset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex)
|
||||
}
|
||||
return offset
|
||||
}()
|
||||
|
||||
return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month.rawValue, day, hour, minute, second, 0, timeZoneOffset)
|
||||
}
|
||||
|
||||
// MARK: - Date Creation
|
||||
|
||||
static func dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int, _ timeZoneOffset: Int) -> Date? {
|
||||
|
||||
var timeInfo = tm()
|
||||
timeInfo.tm_sec = CInt(second)
|
||||
timeInfo.tm_min = CInt(minute)
|
||||
timeInfo.tm_hour = CInt(hour)
|
||||
timeInfo.tm_mday = CInt(day)
|
||||
timeInfo.tm_mon = CInt(month - 1) //It's 1-based coming in
|
||||
timeInfo.tm_year = CInt(year - 1900) //see time.h -- it's years since 1900
|
||||
timeInfo.tm_wday = -1
|
||||
timeInfo.tm_yday = -1
|
||||
timeInfo.tm_isdst = -1
|
||||
timeInfo.tm_gmtoff = 0;
|
||||
timeInfo.tm_zone = nil;
|
||||
|
||||
let rawTime = timegm(&timeInfo) - timeZoneOffset
|
||||
if rawTime == time_t(UInt32.max) {
|
||||
|
||||
// NSCalendar is super-amazingly slow (which is partly why this parser exists),
|
||||
// so this is used only when the date is far enough in the future
|
||||
// (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails.
|
||||
// Hopefully by the time we consistently need dates that far in the future
|
||||
// the performance of NSCalendar won’t be an issue.
|
||||
|
||||
var dateComponents = DateComponents()
|
||||
|
||||
dateComponents.timeZone = TimeZone(secondsFromGMT: timeZoneOffset)
|
||||
dateComponents.year = year
|
||||
dateComponents.month = month
|
||||
dateComponents.day = day
|
||||
dateComponents.hour = hour
|
||||
dateComponents.minute = minute
|
||||
dateComponents.second = second
|
||||
dateComponents.nanosecond = milliseconds * 1000000
|
||||
|
||||
return Calendar.autoupdatingCurrent.date(from: dateComponents)
|
||||
}
|
||||
|
||||
var timeInterval = TimeInterval(rawTime)
|
||||
if milliseconds > 0 {
|
||||
timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0)
|
||||
}
|
||||
|
||||
return Date(timeIntervalSince1970: timeInterval)
|
||||
}
|
||||
|
||||
// MARK: - Time Zones and Offsets
|
||||
|
||||
private static func parsedTimeZoneOffset(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int) -> Int {
|
||||
|
||||
var timeZoneCharacters: [UInt8] = [0, 0, 0, 0, 0, 0] // nil-terminated last character
|
||||
var numberOfCharactersFound = 0
|
||||
var hasAtLeastOneAlphaCharacter = false
|
||||
|
||||
for i in startingIndex..<numberOfBytes {
|
||||
let ch = bytes[i]
|
||||
if ch == DateCharacter.colon || ch == DateCharacter.space {
|
||||
continue
|
||||
}
|
||||
let isAlphaCharacter = isAlpha(ch)
|
||||
if isAlphaCharacter {
|
||||
hasAtLeastOneAlphaCharacter = true
|
||||
}
|
||||
if isAlphaCharacter || isDigit(ch) || ch == DateCharacter.plus || ch == DateCharacter.minus {
|
||||
numberOfCharactersFound += 1
|
||||
timeZoneCharacters[numberOfCharactersFound - 1] = ch
|
||||
}
|
||||
if numberOfCharactersFound >= 5 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if numberOfCharactersFound < 1 || timeZoneCharacters[0] == DateCharacter.Z || timeZoneCharacters[0] == DateCharacter.z {
|
||||
return 0
|
||||
}
|
||||
|
||||
if hasAtLeastOneAlphaCharacter {
|
||||
return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters) ?? 0
|
||||
}
|
||||
return offsetInSecondsForOffsetCharacters(timeZoneCharacters)
|
||||
}
|
||||
|
||||
private static func offsetInSecondsForOffsetCharacters(_ timeZoneCharacters: [UInt8]) -> Int {
|
||||
|
||||
let isPlus = timeZoneCharacters[0] == DateCharacter.plus
|
||||
var finalIndex = 0
|
||||
let numberOfCharacters = strlen(timeZoneCharacters)
|
||||
|
||||
return timeZoneCharacters.withUnsafeBufferPointer { bytes in
|
||||
let hours = nextNumericValue(bytes, numberOfCharacters, 0, 2, &finalIndex) ?? 0
|
||||
let minutes = nextNumericValue(bytes, numberOfCharacters, finalIndex + 1, 2, &finalIndex) ?? 0
|
||||
|
||||
if hours == 0 && minutes == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var seconds = (hours * 60 * 60) + (minutes * 60)
|
||||
if !isPlus {
|
||||
seconds = 0 - seconds
|
||||
}
|
||||
|
||||
return seconds
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns offset in seconds.
|
||||
static func timeZoneOffset(_ hours: Int, _ minutes: Int) -> Int {
|
||||
|
||||
if hours < 0 {
|
||||
return (hours * 60 * 60) - (minutes * 60)
|
||||
}
|
||||
return (hours * 60 * 60) + (minutes * 60)
|
||||
}
|
||||
|
||||
private static func offsetInSecondsForTimeZoneAbbreviation(_ abbreviation: [UInt8]) -> Int? {
|
||||
|
||||
let name = String(cString: abbreviation)
|
||||
return timeZoneTable[name]
|
||||
}
|
||||
|
||||
// MARK: - Parser
|
||||
|
||||
private static func nextMonthValue(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ finalIndex: inout Int) -> DateParser.Month? {
|
||||
|
||||
// Lots of short-circuits here. Not strict.
|
||||
|
||||
var numberOfAlphaCharactersFound = 0
|
||||
var monthCharacters: [CChar] = [0, 0, 0]
|
||||
|
||||
for i in startingIndex..<numberOfBytes {
|
||||
|
||||
finalIndex = i
|
||||
let ch = bytes[i]
|
||||
|
||||
let isAlphaCharacter = isAlpha(ch)
|
||||
if !isAlphaCharacter {
|
||||
if numberOfAlphaCharactersFound < 1 {
|
||||
continue
|
||||
}
|
||||
if numberOfAlphaCharactersFound > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
numberOfAlphaCharactersFound+=1
|
||||
if numberOfAlphaCharactersFound == 1 {
|
||||
if ch == DateCharacter.F || ch == DateCharacter.f {
|
||||
return .February
|
||||
}
|
||||
if ch == DateCharacter.S || ch == DateCharacter.s {
|
||||
return .September
|
||||
}
|
||||
if ch == DateCharacter.O || ch == DateCharacter.o {
|
||||
return .October
|
||||
}
|
||||
if ch == DateCharacter.N || ch == DateCharacter.n {
|
||||
return .November
|
||||
}
|
||||
if ch == DateCharacter.D || ch == DateCharacter.d {
|
||||
return .December
|
||||
}
|
||||
}
|
||||
|
||||
monthCharacters[numberOfAlphaCharactersFound - 1] = CChar(ch)
|
||||
if numberOfAlphaCharactersFound >= 3 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if numberOfAlphaCharactersFound < 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if monthCharacters[0] == DateCharacter.J || monthCharacters[0] == DateCharacter.j { // Jan, Jun, Jul
|
||||
if monthCharacters[1] == DateCharacter.A || monthCharacters[1] == DateCharacter.a {
|
||||
return .January
|
||||
}
|
||||
if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u {
|
||||
if monthCharacters[2] == DateCharacter.N || monthCharacters[2] == DateCharacter.n {
|
||||
return .June
|
||||
}
|
||||
return .July
|
||||
}
|
||||
return .January
|
||||
}
|
||||
|
||||
if monthCharacters[0] == DateCharacter.M || monthCharacters[0] == DateCharacter.m { // March, May
|
||||
if monthCharacters[2] == DateCharacter.Y || monthCharacters[2] == DateCharacter.y {
|
||||
return .May
|
||||
}
|
||||
return .March
|
||||
}
|
||||
|
||||
if monthCharacters[0] == DateCharacter.A || monthCharacters[0] == DateCharacter.a { // April, August
|
||||
if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u {
|
||||
return .August
|
||||
}
|
||||
return .April
|
||||
}
|
||||
|
||||
return .January // Should never get here (but possibly do)
|
||||
}
|
||||
|
||||
private static func nextNumericValue(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ maximumNumberOfDigits: Int, _ finalIndex: inout Int) -> Int? {
|
||||
|
||||
// Maximum for the maximum is 4 (for time zone offsets and years)
|
||||
assert(maximumNumberOfDigits > 0 && maximumNumberOfDigits <= 4)
|
||||
|
||||
var numberOfDigitsFound = 0
|
||||
var digits = [0, 0, 0, 0]
|
||||
|
||||
for i in startingIndex..<numberOfBytes {
|
||||
|
||||
finalIndex = i
|
||||
let ch = Int(bytes[i])
|
||||
|
||||
let isDigit = isDigit(ch)
|
||||
if !isDigit && numberOfDigitsFound < 1 {
|
||||
continue
|
||||
}
|
||||
if !isDigit && numberOfDigitsFound > 0 {
|
||||
break
|
||||
}
|
||||
|
||||
digits[numberOfDigitsFound] = ch - 48; // '0' is 48
|
||||
numberOfDigitsFound+=1
|
||||
if numberOfDigitsFound >= maximumNumberOfDigits {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if numberOfDigitsFound < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if numberOfDigitsFound == 1 {
|
||||
return digits[0]
|
||||
}
|
||||
if numberOfDigitsFound == 2 {
|
||||
return (digits[0] * 10) + digits[1]
|
||||
}
|
||||
if numberOfDigitsFound == 3 {
|
||||
return (digits[0] * 100) + (digits[1] * 10) + digits[2]
|
||||
}
|
||||
return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3]
|
||||
}
|
||||
|
||||
static func isDigit<T: BinaryInteger>(_ ch: T) -> Bool {
|
||||
|
||||
return isdigit(Int32(ch)) != 0
|
||||
}
|
||||
|
||||
static func isAlpha<T: BinaryInteger>(_ ch: T) -> Bool {
|
||||
|
||||
return isalpha(Int32(ch)) != 0
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
//
|
||||
// FeedParser.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
|
||||
// You don’t need to know the type of feed.
|
||||
|
||||
public struct FeedParser {
|
||||
|
||||
public static func canParse(_ data: Data) -> Bool {
|
||||
|
||||
let type = FeedType.feedType(data)
|
||||
|
||||
switch type {
|
||||
case .jsonFeed, .rssInJSON, .rss, .atom:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
public static func parse(urlString: String, data: Data) throws -> ParsedFeed? {
|
||||
|
||||
let type = FeedType.feedType(data)
|
||||
|
||||
switch type {
|
||||
|
||||
case .jsonFeed:
|
||||
return try JSONFeedParser.parse(urlString: urlString, data: data)
|
||||
|
||||
case .rssInJSON:
|
||||
return try RSSInJSONParser.parse(urlString: urlString, data: data)
|
||||
|
||||
case .rss:
|
||||
let feed = RSSParser.parsedFeed(urlString: urlString, data: data)
|
||||
return RSSFeedTransformer.parsedFeed(with: feed, feedType: .rss)
|
||||
|
||||
case .atom:
|
||||
let feed = AtomParser.parsedFeed(urlString: urlString, data: data)
|
||||
return RSSFeedTransformer.parsedFeed(with: feed, feedType: .atom)
|
||||
|
||||
case .unknown, .notAFeed:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
public static func parseAsync(urlString: String, data: Data) async throws -> ParsedFeed? {
|
||||
|
||||
try parse(urlString: urlString, data: data)
|
||||
}
|
||||
}
|
148
Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift
Normal file
148
Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift
Normal file
@ -0,0 +1,148 @@
|
||||
//
|
||||
// FeedType.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum FeedType: Sendable {
|
||||
|
||||
case rss
|
||||
case atom
|
||||
case jsonFeed
|
||||
case rssInJSON
|
||||
case unknown
|
||||
case notAFeed
|
||||
|
||||
private static let minNumberOfBytesRequired = 128
|
||||
|
||||
static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType {
|
||||
|
||||
// Can call with partial data — while still downloading, for instance.
|
||||
// If there’s not enough data, return .unknown. Ask again when there’s more data.
|
||||
// If it’s definitely not a feed, return .notAFeed.
|
||||
|
||||
let count = data.count
|
||||
if count < minNumberOfBytesRequired {
|
||||
return .unknown
|
||||
}
|
||||
|
||||
return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
|
||||
|
||||
guard let baseAddress = pointer.baseAddress else {
|
||||
return .unknown
|
||||
}
|
||||
let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self)
|
||||
|
||||
if isProbablyJSON(cCharPointer, count) {
|
||||
|
||||
if isPartialData {
|
||||
// Might not be able to detect a JSON Feed without all data.
|
||||
// Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests)
|
||||
// has, at this writing, the JSON version element at the end of the feed,
|
||||
// which is totally legal — but it means not being able to detect
|
||||
// that it’s a JSON Feed without all the data.
|
||||
// So this returns .unknown instead of .notAFeed.
|
||||
return .unknown
|
||||
}
|
||||
|
||||
if isProbablyJSONFeed(cCharPointer, count) {
|
||||
return .jsonFeed
|
||||
}
|
||||
if isProbablyRSSInJSON(cCharPointer, count) {
|
||||
return .rssInJSON
|
||||
}
|
||||
}
|
||||
|
||||
if isProbablyRSS(cCharPointer, count) {
|
||||
return .rss
|
||||
}
|
||||
if isProbablyAtom(cCharPointer, count) {
|
||||
return .atom
|
||||
}
|
||||
|
||||
return .notAFeed
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension FeedType {
|
||||
|
||||
static func isProbablyRSS(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
if didFindString("<rss", bytes, count) || didFindString("<rdf:RDF", bytes, count) {
|
||||
return true
|
||||
}
|
||||
|
||||
return didFindString("<channel>", bytes, count) && didFindString("<pubDate>", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyAtom(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
didFindString("<feed", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyJSON(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
bytesStartWithStringIgnoringWhitespace("{", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyJSONFeed(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
// Assumes already called `isProbablyJSON` and it returned true.
|
||||
didFindString("://jsonfeed.org/version/", bytes, count) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyRSSInJSON(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
// Assumes already called `isProbablyJSON` and it returned true.
|
||||
didFindString("rss", bytes, count) && didFindString("channel", bytes, count) && didFindString("item", bytes, count)
|
||||
}
|
||||
|
||||
static func didFindString(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
|
||||
|
||||
let foundString = strnstr(bytes, string, numberOfBytes)
|
||||
return foundString != nil
|
||||
}
|
||||
|
||||
struct Whitespace {
|
||||
static let space = Character(" ").asciiValue!
|
||||
static let `return` = Character("\r").asciiValue!
|
||||
static let newline = Character("\n").asciiValue!
|
||||
static let tab = Character("\t").asciiValue!
|
||||
}
|
||||
|
||||
static func bytesStartWithStringIgnoringWhitespace(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
|
||||
|
||||
var i = 0
|
||||
|
||||
while i < numberOfBytes {
|
||||
|
||||
let ch = bytes[i]
|
||||
|
||||
if ch == Whitespace.space || ch == Whitespace.return || ch == Whitespace.newline || ch == Whitespace.tab {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == string[0] {
|
||||
if let found = strnstr(bytes, string, numberOfBytes) {
|
||||
return found == bytes + i
|
||||
}
|
||||
}
|
||||
|
||||
// Allow for a BOM of up to four bytes (assuming BOM is only at the start)
|
||||
if i < 4 {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
@ -7,9 +7,6 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
#if SWIFT_PACKAGE
|
||||
import ParserObjC
|
||||
#endif
|
||||
|
||||
// See https://jsonfeed.org/version/1.1
|
||||
|
||||
@ -52,9 +49,9 @@ public struct JSONFeedParser {
|
||||
|
||||
static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct.
|
||||
|
||||
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
|
||||
public static func parse(urlString: String, data: Data) throws -> ParsedFeed? {
|
||||
|
||||
guard let d = JSONUtilities.dictionary(with: parserData.data) else {
|
||||
guard let d = JSONUtilities.dictionary(with: data) else {
|
||||
throw FeedParserError(.invalidJSON)
|
||||
}
|
||||
|
||||
@ -70,7 +67,7 @@ public struct JSONFeedParser {
|
||||
|
||||
let authors = parseAuthors(d)
|
||||
let homePageURL = d[Key.homePageURL] as? String
|
||||
let feedURL = d[Key.feedURL] as? String ?? parserData.url
|
||||
let feedURL = d[Key.feedURL] as? String ?? urlString
|
||||
let feedDescription = d[Key.feedDescription] as? String
|
||||
let nextURL = d[Key.nextURL] as? String
|
||||
let iconURL = d[Key.icon] as? String
|
||||
@ -79,7 +76,7 @@ public struct JSONFeedParser {
|
||||
let hubs = parseHubs(d)
|
||||
let language = d[Key.language] as? String
|
||||
|
||||
let items = parseItems(itemsArray, parserData.url)
|
||||
let items = parseItems(itemsArray, urlString)
|
||||
|
||||
return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items)
|
||||
}
|
||||
@ -179,7 +176,7 @@ private extension JSONFeedParser {
|
||||
}
|
||||
|
||||
if isSpecialCaseTitleWithEntitiesFeed(feedURL) {
|
||||
return (title as NSString).rsparser_stringByDecodingHTMLEntities()
|
||||
return HTMLEntityDecoder.decodedString(title)
|
||||
}
|
||||
|
||||
return title
|
||||
@ -221,7 +218,7 @@ private extension JSONFeedParser {
|
||||
guard let dateString = dateString, !dateString.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
return RSDateWithString(dateString)
|
||||
return DateParser.date(string: dateString)
|
||||
}
|
||||
|
||||
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
|
@ -7,19 +7,16 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
#if SWIFT_PACKAGE
|
||||
import ParserObjC
|
||||
#endif
|
||||
|
||||
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
|
||||
// Also: http://cyber.harvard.edu/rss/rss.html
|
||||
|
||||
public struct RSSInJSONParser {
|
||||
|
||||
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
|
||||
public static func parse(urlString: String, data: Data) throws -> ParsedFeed? {
|
||||
|
||||
do {
|
||||
guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else {
|
||||
guard let parsedObject = try JSONSerialization.jsonObject(with: data) as? JSONDictionary else {
|
||||
throw FeedParserError(.invalidJSON)
|
||||
}
|
||||
guard let rssObject = parsedObject["rss"] as? JSONDictionary else {
|
||||
@ -47,11 +44,11 @@ public struct RSSInJSONParser {
|
||||
|
||||
let title = channelObject["title"] as? String
|
||||
let homePageURL = channelObject["link"] as? String
|
||||
let feedURL = parserData.url
|
||||
let feedURL = urlString
|
||||
let feedDescription = channelObject["description"] as? String
|
||||
let feedLanguage = channelObject["language"] as? String
|
||||
|
||||
let items = parseItems(itemsObject!, parserData.url)
|
||||
let items = parseItems(itemsObject!, urlString)
|
||||
|
||||
return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||
|
||||
@ -87,7 +84,7 @@ private extension RSSInJSONParser {
|
||||
|
||||
var datePublished: Date? = nil
|
||||
if let datePublishedString = itemDictionary["pubDate"] as? String {
|
||||
datePublished = RSDateWithString(datePublishedString)
|
||||
datePublished = DateParser.date(string: datePublishedString)
|
||||
}
|
||||
|
||||
let authors = parseAuthors(itemDictionary)
|
||||
@ -127,7 +124,7 @@ private extension RSSInJSONParser {
|
||||
s = contentText!
|
||||
}
|
||||
}
|
||||
uniqueID = (s as NSString).rsparser_md5Hash()
|
||||
uniqueID = s.md5String
|
||||
}
|
||||
|
||||
if let uniqueID = uniqueID {
|
@ -8,7 +8,7 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParsedAttachment: Hashable, Sendable {
|
||||
public final class ParsedAttachment: Hashable, Sendable {
|
||||
|
||||
public let url: String
|
||||
public let mimeType: String?
|
||||
@ -33,4 +33,10 @@ public struct ParsedAttachment: Hashable, Sendable {
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(url)
|
||||
}
|
||||
|
||||
// MARK: - Equatable
|
||||
|
||||
public static func ==(lhs: ParsedAttachment, rhs: ParsedAttachment) -> Bool {
|
||||
lhs.url == rhs.url && lhs.mimeType == rhs.mimeType && lhs.title == rhs.title && lhs.sizeInBytes == rhs.sizeInBytes && lhs.durationInSeconds == rhs.durationInSeconds
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParsedAuthor: Hashable, Codable, Sendable {
|
||||
public final class ParsedAuthor: Hashable, Codable, Sendable {
|
||||
|
||||
public let name: String?
|
||||
public let url: String?
|
||||
@ -22,6 +22,18 @@ public struct ParsedAuthor: Hashable, Codable, Sendable {
|
||||
self.emailAddress = emailAddress
|
||||
}
|
||||
|
||||
/// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.)
|
||||
convenience init(singleString: String) {
|
||||
|
||||
if singleString.contains("@") {
|
||||
self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString)
|
||||
} else if singleString.lowercased().hasPrefix("http") {
|
||||
self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil)
|
||||
} else {
|
||||
self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
@ -41,4 +53,11 @@ public struct ParsedAuthor: Hashable, Codable, Sendable {
|
||||
hasher.combine("")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Equatable
|
||||
|
||||
public static func ==(lhs: ParsedAuthor, rhs: ParsedAuthor) -> Bool {
|
||||
|
||||
lhs.name == rhs.name && lhs.url == rhs.url && lhs.avatarURL == rhs.avatarURL && lhs.emailAddress == rhs.emailAddress
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParsedFeed: Sendable {
|
||||
public final class ParsedFeed: Sendable {
|
||||
|
||||
public let type: FeedType
|
||||
public let title: String?
|
@ -0,0 +1,33 @@
|
||||
//
|
||||
// ParsedHub.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class ParsedHub: Hashable, Sendable {
|
||||
|
||||
public let type: String
|
||||
public let url: String
|
||||
|
||||
init(type: String, url: String) {
|
||||
self.type = type
|
||||
self.url = url
|
||||
}
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(type)
|
||||
hasher.combine(url)
|
||||
}
|
||||
|
||||
// MARK: - Equatable
|
||||
|
||||
public static func ==(lhs: ParsedHub, rhs: ParsedHub) -> Bool {
|
||||
lhs.type == rhs.type && lhs.url == rhs.url
|
||||
}
|
||||
}
|
@ -8,7 +8,7 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParsedItem: Hashable, Sendable {
|
||||
public final class ParsedItem: Hashable, Sendable {
|
||||
|
||||
public let syncServiceID: String? //Nil when not syncing
|
||||
public let uniqueID: String //RSS guid, for instance; may be calculated
|
||||
@ -63,5 +63,10 @@ public struct ParsedItem: Hashable, Sendable {
|
||||
hasher.combine(feedURL)
|
||||
}
|
||||
}
|
||||
|
||||
public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool {
|
||||
|
||||
lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title && lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,444 @@
|
||||
//
|
||||
// AtomParser.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
|
||||
final class AtomParser {
|
||||
|
||||
private var feedURL: String
|
||||
private let data: Data
|
||||
private let feed: RSSFeed
|
||||
|
||||
private var articles = [RSSArticle]()
|
||||
private var currentArticle: RSSArticle? {
|
||||
articles.last
|
||||
}
|
||||
|
||||
private var attributesStack = [StringDictionary]()
|
||||
private var currentAttributes: StringDictionary? {
|
||||
attributesStack.last
|
||||
}
|
||||
|
||||
private var parsingXHTML = false
|
||||
private var xhtmlString: String?
|
||||
|
||||
private var currentAuthor: RSSAuthor?
|
||||
private var parsingAuthor = false
|
||||
|
||||
private var parsingArticle = false
|
||||
private var parsingSource = false
|
||||
private var endFeedFound = false
|
||||
|
||||
static func parsedFeed(urlString: String, data: Data) -> RSSFeed {
|
||||
|
||||
let parser = AtomParser(urlString: urlString, data: data)
|
||||
parser.parse()
|
||||
return parser.feed
|
||||
}
|
||||
|
||||
init(urlString: String, data: Data) {
|
||||
self.feedURL = urlString
|
||||
self.data = data
|
||||
self.feed = RSSFeed(urlString: urlString)
|
||||
}
|
||||
}
|
||||
|
||||
private extension AtomParser {
|
||||
|
||||
func parse() {
|
||||
|
||||
let saxParser = SAXParser(delegate: self, data: data)
|
||||
saxParser.parse()
|
||||
feed.articles = articles
|
||||
}
|
||||
|
||||
private struct XMLName {
|
||||
static let entry = "entry".utf8CString
|
||||
static let content = "content".utf8CString
|
||||
static let summary = "summary".utf8CString
|
||||
static let link = "link".utf8CString
|
||||
static let feed = "feed".utf8CString
|
||||
static let source = "source".utf8CString
|
||||
static let author = "author".utf8CString
|
||||
static let name = "name".utf8CString
|
||||
static let email = "email".utf8CString
|
||||
static let uri = "uri".utf8CString
|
||||
static let title = "title".utf8CString
|
||||
static let id = "id".utf8CString
|
||||
static let published = "published".utf8CString
|
||||
static let updated = "updated".utf8CString
|
||||
static let issued = "issued".utf8CString
|
||||
static let modified = "modified".utf8CString
|
||||
}
|
||||
|
||||
private struct XMLString {
|
||||
static let rel = "rel"
|
||||
static let alternate = "alternate"
|
||||
static let related = "related"
|
||||
static let enclosure = "enclosure"
|
||||
static let href = "href"
|
||||
static let title = "title"
|
||||
static let type = "type"
|
||||
static let length = "length"
|
||||
static let xmlLang = "xml:lang"
|
||||
}
|
||||
|
||||
func currentString(_ saxParser: SAXParser) -> String? {
|
||||
|
||||
saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
|
||||
func currentDate(_ saxParser: SAXParser) -> Date? {
|
||||
|
||||
guard let data = saxParser.currentCharacters else {
|
||||
assertionFailure("Unexpected nil saxParser.currentCharacters in AtomParser.currentDate")
|
||||
return nil
|
||||
}
|
||||
|
||||
return DateParser.date(data: data)
|
||||
}
|
||||
|
||||
func addFeedTitle(_ saxParser: SAXParser) {
|
||||
|
||||
guard feed.title == nil else {
|
||||
return
|
||||
}
|
||||
|
||||
if let title = currentString(saxParser), !title.isEmpty {
|
||||
feed.title = title
|
||||
}
|
||||
}
|
||||
|
||||
func addFeedLink() {
|
||||
|
||||
guard feed.link == nil, let currentAttributes else {
|
||||
return
|
||||
}
|
||||
|
||||
if let related = currentAttributes[XMLString.rel], related == XMLString.alternate {
|
||||
feed.link = currentAttributes[XMLString.href]
|
||||
}
|
||||
}
|
||||
|
||||
func addFeedLanguage() {
|
||||
|
||||
guard feed.language == nil, let currentAttributes else {
|
||||
return
|
||||
}
|
||||
|
||||
feed.language = currentAttributes[XMLString.xmlLang]
|
||||
}
|
||||
|
||||
func addArticle() {
|
||||
let article = RSSArticle(feedURL)
|
||||
articles.append(article)
|
||||
}
|
||||
|
||||
func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
|
||||
|
||||
guard prefix == nil else {
|
||||
return
|
||||
}
|
||||
guard let currentArticle else {
|
||||
assertionFailure("currentArticle must not be nil in AtomParser.addArticleElement")
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.id) {
|
||||
currentArticle.guid = currentString(saxParser)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.title) {
|
||||
currentArticle.title = currentString(saxParser)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.content) {
|
||||
addContent(saxParser, currentArticle)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.summary) {
|
||||
addSummary(saxParser, currentArticle)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.link) {
|
||||
addLink(currentArticle)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.published) {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.updated) {
|
||||
currentArticle.dateModified = currentDate(saxParser)
|
||||
}
|
||||
|
||||
// Atom 0.3 dates
|
||||
else if SAXEqualTags(localName, XMLName.issued) {
|
||||
if currentArticle.datePublished == nil {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.modified) {
|
||||
if currentArticle.dateModified == nil {
|
||||
currentArticle.dateModified = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func addContent(_ saxParser: SAXParser, _ article: RSSArticle) {
|
||||
|
||||
article.body = currentString(saxParser)
|
||||
}
|
||||
|
||||
func addSummary(_ saxParser: SAXParser, _ article: RSSArticle) {
|
||||
|
||||
guard article.body == nil else {
|
||||
return
|
||||
}
|
||||
article.body = currentString(saxParser)
|
||||
}
|
||||
|
||||
func addLink(_ article: RSSArticle) {
|
||||
|
||||
guard let attributes = currentAttributes else {
|
||||
return
|
||||
}
|
||||
guard let urlString = attributes[XMLString.href], !urlString.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
var rel = attributes[XMLString.rel]
|
||||
if rel?.isEmpty ?? true {
|
||||
rel = XMLString.alternate
|
||||
}
|
||||
|
||||
if rel == XMLString.related {
|
||||
if article.link == nil {
|
||||
article.link = urlString
|
||||
}
|
||||
}
|
||||
else if rel == XMLString.alternate {
|
||||
if article.permalink == nil {
|
||||
article.permalink = urlString
|
||||
}
|
||||
}
|
||||
else if rel == XMLString.enclosure {
|
||||
if let enclosure = enclosure(urlString, attributes) {
|
||||
article.addEnclosure(enclosure)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func enclosure(_ urlString: String, _ attributes: StringDictionary) -> RSSEnclosure? {
|
||||
|
||||
let enclosure = RSSEnclosure(url: urlString)
|
||||
enclosure.title = attributes[XMLString.title]
|
||||
enclosure.mimeType = attributes[XMLString.type]
|
||||
|
||||
if let lengthString = attributes[XMLString.length] {
|
||||
enclosure.length = Int(lengthString)
|
||||
}
|
||||
|
||||
return enclosure
|
||||
}
|
||||
|
||||
func addXHTMLTag(_ localName: XMLPointer) {
|
||||
|
||||
guard var xhtmlString else {
|
||||
assertionFailure("xhtmlString must not be nil when in addXHTMLTag.")
|
||||
return
|
||||
}
|
||||
|
||||
guard let name = String(xmlPointer: localName) else {
|
||||
assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.")
|
||||
return
|
||||
}
|
||||
|
||||
xhtmlString.append("<")
|
||||
xhtmlString.append(name)
|
||||
|
||||
if let currentAttributes, currentAttributes.count > 0 {
|
||||
for (key, value) in currentAttributes {
|
||||
xhtmlString.append(" ")
|
||||
xhtmlString.append(key)
|
||||
xhtmlString.append("=\"")
|
||||
|
||||
let encodedValue = value.replacingOccurrences(of: "\"", with: """)
|
||||
xhtmlString.append(encodedValue)
|
||||
xhtmlString.append("\"")
|
||||
}
|
||||
}
|
||||
|
||||
xhtmlString.append(">")
|
||||
}
|
||||
}
|
||||
|
||||
extension AtomParser: SAXParserDelegate {
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if endFeedFound {
|
||||
return
|
||||
}
|
||||
|
||||
let xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) ?? StringDictionary()
|
||||
attributesStack.append(xmlAttributes)
|
||||
|
||||
if parsingXHTML {
|
||||
addXHTMLTag(localName)
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.entry) {
|
||||
parsingArticle = true
|
||||
addArticle()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.author) {
|
||||
parsingAuthor = true
|
||||
currentAuthor = RSSAuthor()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.source) {
|
||||
parsingSource = true
|
||||
return
|
||||
}
|
||||
|
||||
let isContentTag = SAXEqualTags(localName, XMLName.content)
|
||||
let isSummaryTag = SAXEqualTags(localName, XMLName.summary)
|
||||
|
||||
if parsingArticle && (isContentTag || isSummaryTag) {
|
||||
|
||||
if isContentTag {
|
||||
currentArticle?.language = xmlAttributes["xml:lang"]
|
||||
}
|
||||
|
||||
let contentType = xmlAttributes["type"];
|
||||
if contentType == "xhtml" {
|
||||
parsingXHTML = true
|
||||
xhtmlString = ""
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !parsingArticle && SAXEqualTags(localName, XMLName.link) {
|
||||
addFeedLink()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.feed) {
|
||||
addFeedLanguage()
|
||||
}
|
||||
|
||||
saxParser.beginStoringCharacters()
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.feed) {
|
||||
endFeedFound = true
|
||||
return
|
||||
}
|
||||
|
||||
if endFeedFound {
|
||||
return
|
||||
}
|
||||
|
||||
if parsingXHTML {
|
||||
|
||||
let isContentTag = SAXEqualTags(localName, XMLName.content)
|
||||
let isSummaryTag = SAXEqualTags(localName, XMLName.summary)
|
||||
|
||||
if parsingArticle && (isContentTag || isSummaryTag) {
|
||||
|
||||
if isContentTag {
|
||||
currentArticle?.body = xhtmlString
|
||||
}
|
||||
|
||||
else if isSummaryTag {
|
||||
if (currentArticle?.body?.count ?? 0) < 1 {
|
||||
currentArticle?.body = xhtmlString
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isContentTag || isSummaryTag {
|
||||
parsingXHTML = false
|
||||
}
|
||||
|
||||
if var xhtmlString {
|
||||
if let localNameString = String(xmlPointer: localName) {
|
||||
xhtmlString.append("</")
|
||||
xhtmlString.append(localNameString)
|
||||
xhtmlString.append(">")
|
||||
}
|
||||
} else {
|
||||
assertionFailure("xhtmlString must not be nil when parsingXHTML in xmlEndElement.")
|
||||
}
|
||||
}
|
||||
|
||||
else if parsingAuthor {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.author) {
|
||||
parsingAuthor = false
|
||||
if let currentAuthor, !currentAuthor.isEmpty() {
|
||||
currentArticle?.addAuthor(currentAuthor)
|
||||
}
|
||||
currentAuthor = nil
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.name) {
|
||||
currentAuthor?.name = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.email) {
|
||||
currentAuthor?.emailAddress = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.uri) {
|
||||
currentAuthor?.url = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.entry) {
|
||||
parsingArticle = false
|
||||
}
|
||||
|
||||
else if parsingArticle && !parsingSource {
|
||||
addArticleElement(saxParser, localName, prefix)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.source) {
|
||||
parsingSource = false
|
||||
}
|
||||
|
||||
else if !parsingArticle && !parsingSource && SAXEqualTags(localName, XMLName.title) {
|
||||
addFeedTitle(saxParser)
|
||||
}
|
||||
|
||||
_ = attributesStack.popLast()
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) {
|
||||
|
||||
guard parsingXHTML else {
|
||||
return
|
||||
}
|
||||
guard var s = String(xmlPointer: xmlCharactersFound, count: count) else {
|
||||
return
|
||||
}
|
||||
|
||||
// libxml decodes all entities; we need to re-encode certain characters
|
||||
// (<, >, and &) when inside XHTML text content.
|
||||
s = s.replacingOccurrences(of: "<", with: "&;lt;")
|
||||
s = s.replacingOccurrences(of: ">", with: "&;gt;")
|
||||
s = s.replacingOccurrences(of: "&", with: "&")
|
||||
|
||||
xhtmlString = s
|
||||
}
|
||||
}
|
@ -0,0 +1,111 @@
|
||||
//
|
||||
// RSSArticle.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
|
||||
final class RSSArticle {
|
||||
|
||||
var feedURL: String
|
||||
|
||||
/// An RSS guid, if present, or calculated from other attributes.
|
||||
/// Should be unique to the feed, but not necessarily unique
|
||||
/// across different feeds. (Not suitable for a database ID.)
|
||||
lazy var articleID: String = {
|
||||
if let guid {
|
||||
return guid
|
||||
}
|
||||
return calculatedArticleID()
|
||||
}()
|
||||
|
||||
var guid: String?
|
||||
var title: String?
|
||||
var body: String?
|
||||
var link: String?
|
||||
var permalink: String?
|
||||
var authors: [RSSAuthor]?
|
||||
var enclosures: [RSSEnclosure]?
|
||||
var datePublished: Date?
|
||||
var dateModified: Date?
|
||||
var dateParsed: Date
|
||||
var language: String?
|
||||
|
||||
init(_ feedURL: String) {
|
||||
self.feedURL = feedURL
|
||||
self.dateParsed = Date()
|
||||
}
|
||||
|
||||
func addEnclosure(_ enclosure: RSSEnclosure) {
|
||||
|
||||
if enclosures == nil {
|
||||
enclosures = [RSSEnclosure]()
|
||||
}
|
||||
enclosures!.append(enclosure)
|
||||
}
|
||||
|
||||
func addAuthor(_ author: RSSAuthor) {
|
||||
|
||||
if authors == nil {
|
||||
authors = [RSSAuthor]()
|
||||
}
|
||||
authors!.append(author)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSArticle {
|
||||
|
||||
func calculatedArticleID() -> String {
|
||||
|
||||
// Concatenate a combination of properties when no guid. Then hash the result.
|
||||
// In general, feeds should have guids. When they don't, re-runs are very likely,
|
||||
// because there's no other 100% reliable way to determine identity.
|
||||
// This is intended to create an ID unique inside a feed, but not globally unique.
|
||||
// Not suitable for a database ID, in other words.
|
||||
|
||||
var s = ""
|
||||
|
||||
let datePublishedTimeStampString: String? = {
|
||||
guard let datePublished else {
|
||||
return nil
|
||||
}
|
||||
return String(format: "%.0f", datePublished.timeIntervalSince1970)
|
||||
}()
|
||||
|
||||
// Ideally we have a permalink and a pubDate.
|
||||
// Either one would probably be a good guid, but together they should be rock-solid.
|
||||
// (In theory. Feeds are buggy, though.)
|
||||
if let permalink, !permalink.isEmpty, let datePublishedTimeStampString {
|
||||
s.append(permalink)
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let link, !link.isEmpty, let datePublishedTimeStampString {
|
||||
s.append(link)
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let title, !title.isEmpty, let datePublishedTimeStampString {
|
||||
s.append(title)
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let datePublishedTimeStampString {
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let permalink, !permalink.isEmpty {
|
||||
s.append(permalink)
|
||||
}
|
||||
else if let link, !link.isEmpty {
|
||||
s.append(link)
|
||||
}
|
||||
else if let title, !title.isEmpty {
|
||||
s.append(title)
|
||||
}
|
||||
else if let body, !body.isEmpty {
|
||||
s.append(body)
|
||||
}
|
||||
|
||||
return s.md5String
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
//
|
||||
// RSSAuthor.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
final class RSSAuthor {
|
||||
|
||||
var name: String?
|
||||
var url: String?
|
||||
var avatarURL: String?
|
||||
var emailAddress: String?
|
||||
|
||||
init(name: String? = nil, url: String? = nil, avatarURL: String? = nil, emailAddress: String? = nil) {
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.avatarURL = avatarURL
|
||||
self.emailAddress = emailAddress
|
||||
}
|
||||
|
||||
/// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.)
|
||||
convenience init(singleString: String) {
|
||||
|
||||
if singleString.contains("@") {
|
||||
self.init(emailAddress: singleString)
|
||||
} else if singleString.lowercased().hasPrefix("http") {
|
||||
self.init(url: singleString)
|
||||
} else {
|
||||
self.init(name: singleString)
|
||||
}
|
||||
}
|
||||
|
||||
func isEmpty() -> Bool {
|
||||
|
||||
name == nil && url == nil && avatarURL == nil && emailAddress == nil
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
//
|
||||
// RSSEnclosure.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
final class RSSEnclosure {
|
||||
|
||||
var url: String
|
||||
var length: Int?
|
||||
var mimeType: String?
|
||||
var title: String?
|
||||
|
||||
init(url: String) {
|
||||
self.url = url
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
//
|
||||
// RSSFeed.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
final class RSSFeed {
|
||||
|
||||
var urlString: String
|
||||
var title: String?
|
||||
var link: String?
|
||||
var language: String?
|
||||
|
||||
var articles: [RSSArticle]?
|
||||
|
||||
init(urlString: String) {
|
||||
self.urlString = urlString
|
||||
}
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
//
|
||||
// RSSFeedTransformer.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
struct RSSFeedTransformer {
|
||||
|
||||
/// Turn an internal RSSFeed into a public ParsedFeed.
|
||||
static func parsedFeed(with feed: RSSFeed, feedType: FeedType) -> ParsedFeed {
|
||||
|
||||
let items = parsedItems(feed.articles)
|
||||
return ParsedFeed(type: feedType, title: feed.title, homePageURL: feed.link, feedURL: feed.urlString, language: feed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSFeedTransformer {
|
||||
|
||||
static func parsedItems(_ articles: [RSSArticle]?) -> Set<ParsedItem> {
|
||||
|
||||
guard let articles else {
|
||||
return Set<ParsedItem>()
|
||||
}
|
||||
|
||||
return Set(articles.map(parsedItem))
|
||||
}
|
||||
|
||||
static func parsedItem(_ article: RSSArticle) -> ParsedItem {
|
||||
|
||||
let uniqueID = article.articleID
|
||||
let url = article.permalink
|
||||
let externalURL = article.link
|
||||
let title = article.title
|
||||
let language = article.language
|
||||
let contentHTML = article.body
|
||||
let datePublished = article.datePublished
|
||||
let dateModified = article.dateModified
|
||||
let authors = parsedAuthors(article.authors)
|
||||
let attachments = parsedAttachments(article.enclosures)
|
||||
|
||||
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: article.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
|
||||
}
|
||||
|
||||
static func parsedAuthors(_ authors: [RSSAuthor]?) -> Set<ParsedAuthor>? {
|
||||
|
||||
guard let authors = authors, !authors.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
|
||||
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
|
||||
}
|
||||
|
||||
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
|
||||
}
|
||||
|
||||
static func parsedAttachments(_ enclosures: [RSSEnclosure]?) -> Set<ParsedAttachment>? {
|
||||
|
||||
guard let enclosures = enclosures, !enclosures.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
|
||||
|
||||
let sizeInBytes = (enclosure.length ?? 0) > 0 ? enclosure.length : nil
|
||||
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
|
||||
}
|
||||
|
||||
return attachments.isEmpty ? nil : Set(attachments)
|
||||
}
|
||||
}
|
@ -0,0 +1,366 @@
|
||||
//
|
||||
// RSSParser.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
|
||||
public final class RSSParser {
|
||||
|
||||
private let feedURL: String
|
||||
private let data: Data
|
||||
private let feed: RSSFeed
|
||||
private var articles = [RSSArticle]()
|
||||
private var currentArticle: RSSArticle? {
|
||||
articles.last
|
||||
}
|
||||
|
||||
private var endRSSFound = false
|
||||
private var isRDF = false
|
||||
private var parsingArticle = false
|
||||
private var parsingChannelImage = false
|
||||
private var parsingAuthor = false
|
||||
private var currentAttributes: StringDictionary?
|
||||
|
||||
static func parsedFeed(urlString: String, data: Data) -> RSSFeed {
|
||||
|
||||
let parser = RSSParser(urlString: urlString, data: data)
|
||||
parser.parse()
|
||||
return parser.feed
|
||||
}
|
||||
|
||||
init(urlString: String, data: Data) {
|
||||
self.feedURL = urlString
|
||||
self.data = data
|
||||
self.feed = RSSFeed(urlString: urlString)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSParser {
|
||||
|
||||
func parse() {
|
||||
|
||||
let saxParser = SAXParser(delegate: self, data: data)
|
||||
saxParser.parse()
|
||||
feed.articles = articles
|
||||
}
|
||||
|
||||
private struct XMLName {
|
||||
static let uppercaseRDF = "RDF".utf8CString
|
||||
static let item = "item".utf8CString
|
||||
static let guid = "guid".utf8CString
|
||||
static let enclosure = "enclosure".utf8CString
|
||||
static let image = "image".utf8CString
|
||||
static let author = "author".utf8CString
|
||||
static let rss = "rss".utf8CString
|
||||
static let link = "link".utf8CString
|
||||
static let title = "title".utf8CString
|
||||
static let language = "language".utf8CString
|
||||
static let dc = "dc".utf8CString
|
||||
static let content = "content".utf8CString
|
||||
static let encoded = "encoded".utf8CString
|
||||
static let creator = "creator".utf8CString
|
||||
static let date = "date".utf8CString
|
||||
static let pubDate = "pubDate".utf8CString
|
||||
static let description = "description".utf8CString
|
||||
}
|
||||
|
||||
func addFeedElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
|
||||
|
||||
guard prefix == nil else {
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.link) {
|
||||
if feed.link == nil {
|
||||
feed.link = saxParser.currentString
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.title) {
|
||||
feed.title = saxParser.currentString
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.language) {
|
||||
feed.language = saxParser.currentString
|
||||
}
|
||||
}
|
||||
|
||||
func addArticle() {
|
||||
let article = RSSArticle(feedURL)
|
||||
articles.append(article)
|
||||
}
|
||||
|
||||
func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
|
||||
|
||||
guard let currentArticle else {
|
||||
return
|
||||
}
|
||||
|
||||
if let prefix, SAXEqualTags(prefix, XMLName.dc) {
|
||||
addDCElement(saxParser, localName, currentArticle)
|
||||
return
|
||||
}
|
||||
|
||||
if let prefix, SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) {
|
||||
if let currentString = saxParser.currentString, !currentString.isEmpty {
|
||||
currentArticle.body = currentString
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
guard prefix == nil else {
|
||||
return
|
||||
}
|
||||
|
||||
if let currentString = saxParser.currentString {
|
||||
if SAXEqualTags(localName, XMLName.guid) {
|
||||
addGuid(currentString, currentArticle)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.author) {
|
||||
addAuthorWithString(currentString, currentArticle)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.link) {
|
||||
currentArticle.link = urlString(currentString)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.description) {
|
||||
if currentArticle.body == nil {
|
||||
currentArticle.body = currentString
|
||||
}
|
||||
}
|
||||
else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) {
|
||||
currentArticle.title = currentString
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.pubDate) {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.enclosure), let currentAttributes {
|
||||
addEnclosure(currentAttributes, currentArticle)
|
||||
}
|
||||
}
|
||||
|
||||
func addDCElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ currentArticle: RSSArticle) {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.creator) {
|
||||
if let currentString = saxParser.currentString {
|
||||
addAuthorWithString(currentString, currentArticle)
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.date) {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
|
||||
static let isPermalinkKey = "isPermaLink"
|
||||
static let isPermalinkLowercaseKey = "ispermalink"
|
||||
static let falseValue = "false"
|
||||
|
||||
func addGuid(_ guid: String, _ currentArticle: RSSArticle) {
|
||||
|
||||
currentArticle.guid = guid
|
||||
|
||||
guard let currentAttributes else {
|
||||
return
|
||||
}
|
||||
|
||||
let isPermaLinkValue: String? = {
|
||||
|
||||
if let value = currentAttributes[Self.isPermalinkKey] {
|
||||
return value
|
||||
}
|
||||
// Allow for `ispermalink`, `isPermalink`, etc.
|
||||
for (key, value) in currentAttributes {
|
||||
if key.lowercased() == Self.isPermalinkLowercaseKey {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
|
||||
// Spec: `isPermaLink is optional, its default value is true.`
|
||||
// https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
|
||||
// Return only if non-nil and equal to false — otherwise it’s a permalink.
|
||||
if let isPermaLinkValue, isPermaLinkValue == Self.falseValue {
|
||||
return
|
||||
}
|
||||
|
||||
// Feed bug found in the wild: using a guid that’s not really a permalink
|
||||
// and not realizing that `isPermaLink` is true by default.
|
||||
if stringIsProbablyAURLOrRelativePath(guid) {
|
||||
currentArticle.permalink = urlString(guid)
|
||||
}
|
||||
}
|
||||
|
||||
func stringIsProbablyAURLOrRelativePath(_ s: String) -> Bool {
|
||||
|
||||
// The RSS guid is defined as a permalink, except when it appears like this:
|
||||
// `<guid isPermaLink="false">some—identifier</guid>`
|
||||
// However, people often seem to think it’s *not* a permalink by default, even
|
||||
// though it is. So we try to detect the situation where the value is not a URL string,
|
||||
// and not even a relative path. This may need to evolve over time.
|
||||
|
||||
if !s.contains("/") {
|
||||
// This seems to be just about the best possible check.
|
||||
// Bad guids are often just integers, for instance.
|
||||
return false
|
||||
}
|
||||
|
||||
if s.lowercased().hasPrefix("tag:") {
|
||||
// A common non-URL guid form starts with `tag:`.
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/// Do best attempt at turning a string into a URL string.
|
||||
///
|
||||
/// If it already appears to be a URL, return it.
|
||||
/// Otherwise, treat it like a relative URL and resolve using
|
||||
/// the URL of the home page of the feed (if available)
|
||||
/// or the URL of the feed.
|
||||
///
|
||||
/// The returned value is not guaranteed to be a valid URL string.
|
||||
/// It’s a best attempt without going to heroic lengths.
|
||||
func urlString(_ s: String) -> String {
|
||||
|
||||
if s.lowercased().hasPrefix("http") {
|
||||
return s
|
||||
}
|
||||
|
||||
let baseURLString = feed.link ?? feedURL
|
||||
guard let baseURL = URL(string: baseURLString) else {
|
||||
return s
|
||||
}
|
||||
guard let resolvedURL = URL(string: s, relativeTo: baseURL) else {
|
||||
return s
|
||||
}
|
||||
|
||||
return resolvedURL.absoluteString
|
||||
}
|
||||
|
||||
func addAuthorWithString(_ authorString: String, _ currentArticle: RSSArticle) {
|
||||
|
||||
if authorString.isEmpty {
|
||||
return
|
||||
}
|
||||
|
||||
let author = RSSAuthor(singleString: authorString)
|
||||
currentArticle.addAuthor(author)
|
||||
}
|
||||
|
||||
private struct EnclosureKey {
|
||||
static let url = "url"
|
||||
static let length = "length"
|
||||
static let type = "type"
|
||||
}
|
||||
|
||||
func addEnclosure(_ attributes: StringDictionary, _ currentArticle: RSSArticle) {
|
||||
|
||||
guard let url = attributes[EnclosureKey.url], !url.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
let enclosure = RSSEnclosure(url: url)
|
||||
if let lengthValue = attributes[EnclosureKey.length], let length = Int(lengthValue) {
|
||||
enclosure.length = length
|
||||
}
|
||||
enclosure.mimeType = attributes[EnclosureKey.type]
|
||||
|
||||
currentArticle.addEnclosure(enclosure)
|
||||
}
|
||||
|
||||
func currentDate(_ saxParser: SAXParser) -> Date? {
|
||||
|
||||
guard let data = saxParser.currentCharacters else {
|
||||
return nil
|
||||
}
|
||||
return DateParser.date(data: data)
|
||||
}
|
||||
}
|
||||
|
||||
extension RSSParser: SAXParserDelegate {
|
||||
|
||||
static let rdfAbout = "rdf:about"
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if endRSSFound {
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.uppercaseRDF) {
|
||||
isRDF = true
|
||||
return
|
||||
}
|
||||
|
||||
var xmlAttributes: StringDictionary? = nil
|
||||
if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(localName, XMLName.enclosure) {
|
||||
xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount)
|
||||
}
|
||||
if currentAttributes != xmlAttributes {
|
||||
currentAttributes = xmlAttributes
|
||||
}
|
||||
|
||||
if prefix == nil && SAXEqualTags(localName, XMLName.item) {
|
||||
addArticle()
|
||||
parsingArticle = true
|
||||
|
||||
if isRDF, let rdfGuid = xmlAttributes?[Self.rdfAbout], let currentArticle { // RSS 1.0 guid
|
||||
currentArticle.guid = rdfGuid
|
||||
currentArticle.permalink = rdfGuid
|
||||
}
|
||||
}
|
||||
else if prefix == nil && SAXEqualTags(localName, XMLName.image) {
|
||||
parsingChannelImage = true
|
||||
}
|
||||
else if prefix == nil && SAXEqualTags(localName, XMLName.author) {
|
||||
if parsingArticle {
|
||||
parsingAuthor = true
|
||||
}
|
||||
}
|
||||
|
||||
if !parsingChannelImage {
|
||||
saxParser.beginStoringCharacters()
|
||||
}
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
if endRSSFound {
|
||||
return
|
||||
}
|
||||
|
||||
if isRDF && SAXEqualTags(localName, XMLName.uppercaseRDF) {
|
||||
endRSSFound = true
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.rss) {
|
||||
endRSSFound = true
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.image) {
|
||||
parsingChannelImage = false
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.item) {
|
||||
parsingArticle = false
|
||||
}
|
||||
else if parsingArticle {
|
||||
addArticleElement(saxParser, localName, prefix)
|
||||
if SAXEqualTags(localName, XMLName.author) {
|
||||
parsingAuthor = false
|
||||
}
|
||||
}
|
||||
else if !parsingChannelImage {
|
||||
addFeedElement(saxParser, localName, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) {
|
||||
|
||||
// Required method.
|
||||
}
|
||||
}
|
||||
|
@ -1,76 +0,0 @@
|
||||
//
|
||||
// FeedParser.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import ParserObjC
|
||||
|
||||
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
|
||||
// You don’t need to know the type of feed.
|
||||
|
||||
public struct FeedParser {
|
||||
|
||||
public static func canParse(_ parserData: ParserData) -> Bool {
|
||||
|
||||
let type = feedType(parserData)
|
||||
|
||||
switch type {
|
||||
case .jsonFeed, .rssInJSON, .rss, .atom:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? {
|
||||
|
||||
let type = feedType(parserData)
|
||||
|
||||
switch type {
|
||||
|
||||
case .jsonFeed:
|
||||
return try JSONFeedParser.parse(parserData)
|
||||
|
||||
case .rssInJSON:
|
||||
return try RSSInJSONParser.parse(parserData)
|
||||
|
||||
case .rss:
|
||||
return RSSParser.parse(parserData)
|
||||
|
||||
case .atom:
|
||||
return AtomParser.parse(parserData)
|
||||
|
||||
case .unknown, .notAFeed:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
/// For unit tests measuring performance.
|
||||
public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? {
|
||||
|
||||
let type = feedType(parserData)
|
||||
|
||||
switch type {
|
||||
|
||||
case .jsonFeed:
|
||||
return try JSONFeedParser.parse(parserData)
|
||||
|
||||
case .rssInJSON:
|
||||
return try RSSInJSONParser.parse(parserData)
|
||||
|
||||
case .rss:
|
||||
return RSSParser.parse(parserData)
|
||||
|
||||
case .atom:
|
||||
return AtomParser.parse(parserData)
|
||||
|
||||
case .unknown, .notAFeed:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
//
|
||||
// FeedType.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
#if SWIFT_PACKAGE
|
||||
import ParserObjC
|
||||
#endif
|
||||
|
||||
public enum FeedType: Sendable {
|
||||
case rss
|
||||
case atom
|
||||
case jsonFeed
|
||||
case rssInJSON
|
||||
case unknown
|
||||
case notAFeed
|
||||
}
|
||||
|
||||
|
||||
private let minNumberOfBytesRequired = 128
|
||||
|
||||
public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType {
|
||||
|
||||
// Can call with partial data — while still downloading, for instance.
|
||||
// If there’s not enough data, return .unknown. Ask again when there’s more data.
|
||||
// If it’s definitely not a feed, return .notAFeed.
|
||||
//
|
||||
// This is fast enough to call on the main thread.
|
||||
|
||||
if parserData.data.count < minNumberOfBytesRequired {
|
||||
return .unknown
|
||||
}
|
||||
|
||||
let nsdata = parserData.data as NSData
|
||||
|
||||
if nsdata.isProbablyJSONFeed() {
|
||||
return .jsonFeed
|
||||
}
|
||||
if nsdata.isProbablyRSSInJSON() {
|
||||
return .rssInJSON
|
||||
}
|
||||
if nsdata.isProbablyRSS() {
|
||||
return .rss
|
||||
}
|
||||
if nsdata.isProbablyAtom() {
|
||||
return .atom
|
||||
}
|
||||
|
||||
if isPartialData && nsdata.isProbablyJSON() {
|
||||
// Might not be able to detect a JSON Feed without all data.
|
||||
// Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests)
|
||||
// has, at this writing, the JSON version element at the end of the feed,
|
||||
// which is totally legal — but it means not being able to detect
|
||||
// that it’s a JSON Feed without all the data.
|
||||
// So this returns .unknown instead of .notAFeed.
|
||||
return .unknown
|
||||
}
|
||||
|
||||
return .notAFeed
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
//
|
||||
// ParsedHub.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParsedHub: Hashable, Sendable {
|
||||
|
||||
public let type: String
|
||||
public let url: String
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
//
|
||||
// AtomParser.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
#if SWIFT_PACKAGE
|
||||
import ParserObjC
|
||||
#endif
|
||||
|
||||
// RSSParser wraps the Objective-C RSAtomParser.
|
||||
//
|
||||
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
|
||||
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
|
||||
// the same things that JSONFeedParser and RSSInJSONParser create.
|
||||
//
|
||||
// In general, you should see FeedParser.swift for all your feed-parsing needs.
|
||||
|
||||
public struct AtomParser {
|
||||
|
||||
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
|
||||
|
||||
if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) {
|
||||
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
@ -1,80 +0,0 @@
|
||||
//
|
||||
// RSParsedFeedTransformer.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
#if SWIFT_PACKAGE
|
||||
import ParserObjC
|
||||
#endif
|
||||
|
||||
// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago.
|
||||
// They create an RSParsedFeed object and related Objective-C objects.
|
||||
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
|
||||
// which is part of providing a single API for feed parsing.
|
||||
|
||||
struct RSParsedFeedTransformer {
|
||||
|
||||
static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
|
||||
|
||||
let items = parsedItems(rsParsedFeed.articles)
|
||||
return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSParsedFeedTransformer {
|
||||
|
||||
static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> Set<ParsedItem> {
|
||||
|
||||
// Create Set<ParsedItem> from Set<RSParsedArticle>
|
||||
|
||||
return Set(parsedArticles.map(parsedItem))
|
||||
}
|
||||
|
||||
static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
|
||||
|
||||
let uniqueID = parsedArticle.articleID
|
||||
let url = parsedArticle.permalink
|
||||
let externalURL = parsedArticle.link
|
||||
let title = parsedArticle.title
|
||||
let language = parsedArticle.language
|
||||
let contentHTML = parsedArticle.body
|
||||
let datePublished = parsedArticle.datePublished
|
||||
let dateModified = parsedArticle.dateModified
|
||||
let authors = parsedAuthors(parsedArticle.authors)
|
||||
let attachments = parsedAttachments(parsedArticle.enclosures)
|
||||
|
||||
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
|
||||
}
|
||||
|
||||
static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
|
||||
|
||||
guard let authors = authors, !authors.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
|
||||
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
|
||||
}
|
||||
|
||||
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
|
||||
}
|
||||
|
||||
static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {
|
||||
|
||||
guard let enclosures = enclosures, !enclosures.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
|
||||
|
||||
let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil
|
||||
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
|
||||
}
|
||||
|
||||
return attachments.isEmpty ? nil : Set(attachments)
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
//
|
||||
// RSSParser.swift
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import ParserObjC
|
||||
|
||||
// RSSParser wraps the Objective-C RSRSSParser.
|
||||
//
|
||||
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
|
||||
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
|
||||
// the same things that JSONFeedParser and RSSInJSONParser create.
|
||||
//
|
||||
// In general, you should see FeedParser.swift for all your feed-parsing needs.
|
||||
|
||||
public struct RSSParser {
|
||||
|
||||
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
|
||||
|
||||
if let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) {
|
||||
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
349
Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift
Normal file
349
Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift
Normal file
@ -0,0 +1,349 @@
|
||||
//
|
||||
// HTMLEntityDecoder.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/14/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class HTMLEntityDecoder {
|
||||
|
||||
public static func decodedString(_ encodedString: String) -> String {
|
||||
|
||||
let scanner = EntityScanner(string: encodedString)
|
||||
var result = ""
|
||||
var didDecodeAtLeastOneEntity = false
|
||||
|
||||
while true {
|
||||
|
||||
let scannedString = scanner.scanUpToAmpersand()
|
||||
if !scannedString.isEmpty {
|
||||
result.append(scannedString)
|
||||
}
|
||||
if scanner.isAtEnd {
|
||||
break
|
||||
}
|
||||
|
||||
let savedScanLocation = scanner.scanLocation
|
||||
|
||||
if let decodedEntity = scanner.scanEntityValue() {
|
||||
result.append(decodedEntity)
|
||||
didDecodeAtLeastOneEntity = true
|
||||
}
|
||||
else {
|
||||
result.append("&")
|
||||
scanner.scanLocation = savedScanLocation + 1
|
||||
}
|
||||
|
||||
if scanner.isAtEnd {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !didDecodeAtLeastOneEntity { // No entities decoded?
|
||||
return encodedString
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
/// Purpose-built version of NSScanner, which has deprecated the parts we want to use.
|
||||
final class EntityScanner {
|
||||
|
||||
let string: String
|
||||
let count: Int
|
||||
var scanLocation = 0
|
||||
|
||||
var isAtEnd: Bool {
|
||||
scanLocation >= count
|
||||
}
|
||||
|
||||
var currentCharacter: Character? {
|
||||
guard !isAtEnd else {
|
||||
return nil
|
||||
}
|
||||
return string.characterAtIntIndex(scanLocation)
|
||||
}
|
||||
|
||||
init(string: String) {
|
||||
self.string = string
|
||||
self.count = string.count
|
||||
}
|
||||
|
||||
static let ampersandCharacter = Character("&")
|
||||
|
||||
/// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`.
|
||||
/// - Returns: the scanned portion before `characterToFind`. May be empty string.
|
||||
func scanUpToAmpersand() -> String {
|
||||
|
||||
let characterToFind = Self.ampersandCharacter
|
||||
var scanned = ""
|
||||
|
||||
while true {
|
||||
|
||||
guard let ch = currentCharacter else {
|
||||
break
|
||||
}
|
||||
scanLocation += 1
|
||||
|
||||
if ch == characterToFind {
|
||||
break
|
||||
}
|
||||
else {
|
||||
scanned.append(ch)
|
||||
}
|
||||
}
|
||||
|
||||
return scanned
|
||||
}
|
||||
|
||||
static let semicolonCharacter = Character(";")
|
||||
|
||||
func scanEntityValue() -> String? {
|
||||
|
||||
let initialScanLocation = scanLocation
|
||||
let maxEntityLength = 20 // It’s probably smaller, but this is just for sanity.
|
||||
|
||||
while true {
|
||||
|
||||
guard let ch = currentCharacter else {
|
||||
break
|
||||
}
|
||||
if CharacterSet.whitespacesAndNewlines.contains(ch.unicodeScalars.first!) {
|
||||
break
|
||||
}
|
||||
|
||||
if ch == Self.semicolonCharacter {
|
||||
let entityRange = initialScanLocation..<scanLocation
|
||||
guard let entity = string.substring(intRange: entityRange), let decodedEntity = decodedEntity(entity) else {
|
||||
assertionFailure("Unexpected failure scanning entity in scanEntityValue.")
|
||||
scanLocation = initialScanLocation + 1
|
||||
return nil
|
||||
}
|
||||
scanLocation = scanLocation + 1
|
||||
return decodedEntity
|
||||
}
|
||||
|
||||
scanLocation += 1
|
||||
if scanLocation - initialScanLocation > maxEntityLength {
|
||||
break
|
||||
}
|
||||
if isAtEnd {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
extension String {
|
||||
|
||||
func indexForInt(_ i: Int) -> Index? {
|
||||
|
||||
index(startIndex, offsetBy: i, limitedBy: endIndex)
|
||||
}
|
||||
|
||||
func characterAtIntIndex(_ i: Int) -> Character? {
|
||||
|
||||
guard let index = indexForInt(i) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return self[index]
|
||||
}
|
||||
|
||||
func substring(intRange: Range<Int>) -> String? {
|
||||
|
||||
guard let rangeLower = indexForInt(intRange.lowerBound) else {
|
||||
return nil
|
||||
}
|
||||
guard let rangeUpper = indexForInt(intRange.upperBound) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return String(self[rangeLower..<rangeUpper])
|
||||
}
|
||||
}
|
||||
|
||||
/// rawEntity may or may not have leading `&` and/or trailing `;` characters.
|
||||
private func decodedEntity(_ rawEntity: String) -> String? {
|
||||
|
||||
var s = rawEntity
|
||||
|
||||
if s.hasPrefix("&") {
|
||||
s.removeFirst()
|
||||
}
|
||||
if s.hasSuffix(";") {
|
||||
s.removeLast()
|
||||
}
|
||||
|
||||
if let decodedEntity = entitiesDictionary[s] {
|
||||
return decodedEntity
|
||||
}
|
||||
|
||||
if s.hasPrefix("#x") || s.hasPrefix("#X") { // Hex
|
||||
let scanner = Scanner(string: s)
|
||||
scanner.charactersToBeSkipped = CharacterSet(charactersIn: "#xX")
|
||||
var hexValue: UInt64 = 0
|
||||
if scanner.scanHexInt64(&hexValue) {
|
||||
return stringWithValue(UInt32(hexValue))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
else if s.hasPrefix("#") {
|
||||
s.removeFirst()
|
||||
guard let value = UInt32(s), value >= 1 else {
|
||||
return nil
|
||||
}
|
||||
return stringWithValue(value)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
private func stringWithValue(_ value: UInt32) -> String? {
|
||||
|
||||
// From WebCore's HTMLEntityParser
|
||||
let windowsLatin1ExtensionArray: [UInt32] = [
|
||||
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
|
||||
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
|
||||
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
|
||||
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
|
||||
]
|
||||
|
||||
var modifiedValue = value
|
||||
|
||||
if (modifiedValue & ~0x1F) == 0x80 { // value >= 128 && value < 160
|
||||
modifiedValue = windowsLatin1ExtensionArray[Int(modifiedValue - 0x80)]
|
||||
}
|
||||
|
||||
modifiedValue = CFSwapInt32HostToLittle(modifiedValue)
|
||||
|
||||
let data = Data(bytes: &modifiedValue, count: MemoryLayout.size(ofValue: modifiedValue))
|
||||
|
||||
return String(data: data, encoding: .utf32LittleEndian)
|
||||
}
|
||||
|
||||
private let entitiesDictionary =
|
||||
[
|
||||
"AElig": "Æ",
|
||||
"Aacute": "Á",
|
||||
"Acirc": "Â",
|
||||
"Agrave": "À",
|
||||
"Aring": "Å",
|
||||
"Atilde": "Ã",
|
||||
"Auml": "Ä",
|
||||
"Ccedil": "Ç",
|
||||
"Dstrok": "Ð",
|
||||
"ETH": "Ð",
|
||||
"Eacute": "É",
|
||||
"Ecirc": "Ê",
|
||||
"Egrave": "È",
|
||||
"Euml": "Ë",
|
||||
"Iacute": "Í",
|
||||
"Icirc": "Î",
|
||||
"Igrave": "Ì",
|
||||
"Iuml": "Ï",
|
||||
"Ntilde": "Ñ",
|
||||
"Oacute": "Ó",
|
||||
"Ocirc": "Ô",
|
||||
"Ograve": "Ò",
|
||||
"Oslash": "Ø",
|
||||
"Otilde": "Õ",
|
||||
"Ouml": "Ö",
|
||||
"Pi": "Π",
|
||||
"THORN": "Þ",
|
||||
"Uacute": "Ú",
|
||||
"Ucirc": "Û",
|
||||
"Ugrave": "Ù",
|
||||
"Uuml": "Ü",
|
||||
"Yacute": "Y",
|
||||
"aacute": "á",
|
||||
"acirc": "â",
|
||||
"acute": "´",
|
||||
"aelig": "æ",
|
||||
"agrave": "à",
|
||||
"amp": "&",
|
||||
"apos": "'",
|
||||
"aring": "å",
|
||||
"atilde": "ã",
|
||||
"auml": "ä",
|
||||
"brkbar": "¦",
|
||||
"brvbar": "¦",
|
||||
"ccedil": "ç",
|
||||
"cedil": "¸",
|
||||
"cent": "¢",
|
||||
"copy": "©",
|
||||
"curren": "¤",
|
||||
"deg": "°",
|
||||
"die": "¨",
|
||||
"divide": "÷",
|
||||
"eacute": "é",
|
||||
"ecirc": "ê",
|
||||
"egrave": "è",
|
||||
"eth": "ð",
|
||||
"euml": "ë",
|
||||
"euro": "€",
|
||||
"frac12": "½",
|
||||
"frac14": "¼",
|
||||
"frac34": "¾",
|
||||
"gt": ">",
|
||||
"hearts": "♥",
|
||||
"hellip": "…",
|
||||
"iacute": "í",
|
||||
"icirc": "î",
|
||||
"iexcl": "¡",
|
||||
"igrave": "ì",
|
||||
"iquest": "¿",
|
||||
"iuml": "ï",
|
||||
"laquo": "«",
|
||||
"ldquo": "“",
|
||||
"lsquo": "‘",
|
||||
"lt": "<",
|
||||
"macr": "¯",
|
||||
"mdash": "—",
|
||||
"micro": "µ",
|
||||
"middot": "·",
|
||||
"ndash": "–",
|
||||
"not": "¬",
|
||||
"ntilde": "ñ",
|
||||
"oacute": "ó",
|
||||
"ocirc": "ô",
|
||||
"ograve": "ò",
|
||||
"ordf": "ª",
|
||||
"ordm": "º",
|
||||
"oslash": "ø",
|
||||
"otilde": "õ",
|
||||
"ouml": "ö",
|
||||
"para": "¶",
|
||||
"pi": "π",
|
||||
"plusmn": "±",
|
||||
"pound": "£",
|
||||
"quot": "\"",
|
||||
"raquo": "»",
|
||||
"rdquo": "”",
|
||||
"reg": "®",
|
||||
"rsquo": "’",
|
||||
"sect": "§",
|
||||
"shy": stringWithValue(173),
|
||||
"sup1": "¹",
|
||||
"sup2": "²",
|
||||
"sup3": "³",
|
||||
"szlig": "ß",
|
||||
"thorn": "þ",
|
||||
"times": "×",
|
||||
"trade": "™",
|
||||
"uacute": "ú",
|
||||
"ucirc": "û",
|
||||
"ugrave": "ù",
|
||||
"uml": "¨",
|
||||
"uuml": "ü",
|
||||
"yacute": "y",
|
||||
"yen": "¥",
|
||||
"yuml": "ÿ",
|
||||
"infin": "∞",
|
||||
"nbsp": stringWithValue(160)
|
||||
]
|
22
Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift
Normal file
22
Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift
Normal file
@ -0,0 +1,22 @@
|
||||
//
|
||||
// HTMLLink.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/21/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class HTMLLink {
|
||||
|
||||
public var urlString: String? // Absolute URL string
|
||||
public var text: String?
|
||||
public var title: String? // Title attribute inside anchor tag
|
||||
|
||||
init(urlString: String? = nil, text: String? = nil, title: String? = nil) {
|
||||
|
||||
self.urlString = urlString
|
||||
self.text = text
|
||||
self.title = title
|
||||
}
|
||||
}
|
118
Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift
Normal file
118
Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift
Normal file
@ -0,0 +1,118 @@
|
||||
//
|
||||
// HTMLLinkParser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/21/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
|
||||
public final class HTMLLinkParser {
|
||||
|
||||
public private(set) var links = [HTMLLink]()
|
||||
|
||||
private let parserData: ParserData
|
||||
private let baseURL: URL?
|
||||
|
||||
public static func htmlLinks(with parserData: ParserData) -> [HTMLLink] {
|
||||
|
||||
let parser = HTMLLinkParser(parserData)
|
||||
parser.parse()
|
||||
return parser.links
|
||||
}
|
||||
|
||||
init(_ parserData: ParserData) {
|
||||
|
||||
self.parserData = parserData
|
||||
self.baseURL = URL(string: parserData.url)
|
||||
}
|
||||
}
|
||||
|
||||
private extension HTMLLinkParser {
|
||||
|
||||
func parse() {
|
||||
|
||||
let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data)
|
||||
htmlParser.parse()
|
||||
}
|
||||
}
|
||||
|
||||
extension HTMLLinkParser: SAXHTMLParserDelegate {
|
||||
|
||||
private var currentLink: HTMLLink? {
|
||||
links.last
|
||||
}
|
||||
|
||||
private struct HTMLAttributeName {
|
||||
static let href = "href"
|
||||
static let title = "title"
|
||||
}
|
||||
|
||||
private func title(with attributesDictionary: StringDictionary) -> String? {
|
||||
|
||||
attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.title)
|
||||
}
|
||||
|
||||
private func urlString(with attributesDictionary: StringDictionary) -> String? {
|
||||
|
||||
guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href), !href.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let baseURL, let absoluteURL = URL(string: href, relativeTo: baseURL) else {
|
||||
assertionFailure("Expected to create URL")
|
||||
return nil
|
||||
}
|
||||
|
||||
return absoluteURL.absoluteString
|
||||
}
|
||||
|
||||
private func handleLinkAttributes(_ attributesDictionary: StringDictionary) {
|
||||
|
||||
guard let currentLink else {
|
||||
assertionFailure("currentLink must not be nil")
|
||||
return
|
||||
}
|
||||
|
||||
currentLink.urlString = urlString(with: attributesDictionary)
|
||||
currentLink.title = title(with: attributesDictionary)
|
||||
}
|
||||
|
||||
private struct HTMLName {
|
||||
static let a = "a".utf8CString
|
||||
}
|
||||
|
||||
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
guard SAXEqualTags(name, HTMLName.a) else {
|
||||
return
|
||||
}
|
||||
|
||||
let link = HTMLLink()
|
||||
links.append(link)
|
||||
|
||||
if let attributesDictionary = saxHTMLParser.attributesDictionary(attributes) {
|
||||
handleLinkAttributes(attributesDictionary)
|
||||
}
|
||||
|
||||
saxHTMLParser.beginStoringCharacters()
|
||||
}
|
||||
|
||||
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) {
|
||||
|
||||
guard SAXEqualTags(name, HTMLName.a) else {
|
||||
return
|
||||
}
|
||||
guard let currentLink else {
|
||||
assertionFailure("currentLink must not be nil.")
|
||||
return
|
||||
}
|
||||
|
||||
currentLink.text = saxHTMLParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
|
||||
public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) {
|
||||
// Nothing needed.
|
||||
}
|
||||
}
|
437
Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift
Normal file
437
Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift
Normal file
@ -0,0 +1,437 @@
|
||||
//
|
||||
// HTMLMetadata.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/22/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class HTMLMetadata {
|
||||
|
||||
public let baseURLString: String
|
||||
public let tags: [HTMLTag]
|
||||
public let favicons: [HTMLMetadataFavicon]?
|
||||
public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]?
|
||||
public let feedLinks: [HTMLMetadataFeedLink]?
|
||||
public let openGraphProperties: HTMLOpenGraphProperties?
|
||||
public let twitterProperties: HTMLTwitterProperties?
|
||||
|
||||
init(_ urlString: String, _ tags: [HTMLTag]) {
|
||||
|
||||
self.baseURLString = urlString
|
||||
self.tags = tags
|
||||
|
||||
self.favicons = Self.resolvedFaviconLinks(urlString, tags)
|
||||
|
||||
if let appleTouchIconTags = Self.appleTouchIconTags(tags) {
|
||||
self.appleTouchIcons = appleTouchIconTags.map { htmlTag in
|
||||
HTMLMetadataAppleTouchIcon(urlString, htmlTag)
|
||||
}
|
||||
}
|
||||
else {
|
||||
self.appleTouchIcons = nil
|
||||
}
|
||||
|
||||
if let feedLinkTags = Self.feedLinkTags(tags) {
|
||||
self.feedLinks = feedLinkTags.map { htmlTag in
|
||||
HTMLMetadataFeedLink(urlString, htmlTag)
|
||||
}
|
||||
}
|
||||
else {
|
||||
self.feedLinks = nil
|
||||
}
|
||||
|
||||
self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags)
|
||||
self.twitterProperties = HTMLTwitterProperties(urlString, tags)
|
||||
}
|
||||
|
||||
static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? {
|
||||
|
||||
guard let linkTags = linkTagsWithMatchingRel("icon", tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var seenHrefs = [String]()
|
||||
|
||||
let favicons: [HTMLMetadataFavicon] = linkTags.compactMap { htmlTag in
|
||||
|
||||
let favicon = HTMLMetadataFavicon(baseURLString, htmlTag)
|
||||
guard let urlString = favicon.urlString else {
|
||||
return nil
|
||||
}
|
||||
guard !seenHrefs.contains(urlString) else {
|
||||
return nil
|
||||
}
|
||||
seenHrefs.append(urlString)
|
||||
return favicon
|
||||
}
|
||||
|
||||
return favicons.isEmpty ? nil : favicons
|
||||
}
|
||||
|
||||
static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
guard let linkTags = linkTags(tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], linkTags) else {
|
||||
return nil
|
||||
}
|
||||
return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags
|
||||
}
|
||||
|
||||
static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
guard let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let feedLinkTags = alternateLinkTags.filter { tag in
|
||||
|
||||
guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else {
|
||||
return false
|
||||
}
|
||||
guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
return feedLinkTags.isEmpty ? nil : feedLinkTags
|
||||
}
|
||||
|
||||
static func typeIsFeedType(_ type: String) -> Bool {
|
||||
|
||||
let lowerType = type.lowercased()
|
||||
return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json")
|
||||
}
|
||||
|
||||
static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
let linkTags = tags.filter { $0.tagType == .link }
|
||||
return linkTags.isEmpty ? nil : linkTags
|
||||
}
|
||||
|
||||
static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
// Case-insensitive; matches a whitespace-delimited word
|
||||
|
||||
guard let linkTags = linkTags(tags) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let tagsWithURLString = linkTags.filter { tag in
|
||||
guard let attributes = tag.attributes else {
|
||||
return false
|
||||
}
|
||||
guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
if tagsWithURLString.isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString) else {
|
||||
return nil
|
||||
}
|
||||
return matchingTags.isEmpty ? nil : matchingTags
|
||||
}
|
||||
|
||||
static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? {
|
||||
|
||||
let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() }
|
||||
|
||||
let matchingTags: [HTMLTag] = {
|
||||
|
||||
tags.filter { tag in
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
return false
|
||||
}
|
||||
guard let relValue = relValue(from: attributes) else {
|
||||
return false
|
||||
}
|
||||
|
||||
let relValues = relValue.components(separatedBy: .whitespacesAndNewlines)
|
||||
for oneRelValue in relValues {
|
||||
let oneLowerRelValue = oneRelValue.lowercased()
|
||||
|
||||
for lowerValueToMatch in lowerValuesToMatch {
|
||||
if lowerValueToMatch == oneLowerRelValue {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}()
|
||||
|
||||
return matchingTags.isEmpty ? nil : matchingTags
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLMetadataAppleTouchIcon {
|
||||
|
||||
public let rel: String?
|
||||
public let sizes: String?
|
||||
public let size: CGSize?
|
||||
public let urlString: String? // Absolute
|
||||
|
||||
init(_ urlString: String, _ tag: HTMLTag) {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
self.rel = nil
|
||||
self.sizes = nil
|
||||
self.size = nil
|
||||
self.urlString = nil
|
||||
return
|
||||
}
|
||||
|
||||
self.rel = attributes.object(forCaseInsensitiveKey: "rel")
|
||||
self.urlString = absoluteURLString(from: attributes, baseURL: urlString)
|
||||
|
||||
guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else {
|
||||
self.sizes = nil
|
||||
self.size = nil
|
||||
return
|
||||
}
|
||||
self.sizes = sizes
|
||||
|
||||
let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x"))
|
||||
if sizeComponents.count == 2, let width = Double(sizeComponents[0]), let height = Double(sizeComponents[1]) {
|
||||
self.size = CGSize(width: width, height: height)
|
||||
}
|
||||
else {
|
||||
self.size = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLMetadataFeedLink {
|
||||
|
||||
public let title: String?
|
||||
public let type: String?
|
||||
public let urlString: String? // Absolute
|
||||
|
||||
init(_ urlString: String, _ tag: HTMLTag) {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
self.title = nil
|
||||
self.type = nil
|
||||
self.urlString = nil
|
||||
return
|
||||
}
|
||||
|
||||
self.urlString = absoluteURLString(from: attributes, baseURL: urlString)
|
||||
self.title = attributes.object(forCaseInsensitiveKey: "title")
|
||||
self.type = attributes.object(forCaseInsensitiveKey: "type")
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLMetadataFavicon {
|
||||
|
||||
public let type: String?
|
||||
public let urlString: String?
|
||||
|
||||
init(_ urlString: String, _ tag: HTMLTag) {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
self.type = nil
|
||||
self.urlString = nil
|
||||
return
|
||||
}
|
||||
|
||||
self.urlString = absoluteURLString(from: attributes, baseURL: urlString)
|
||||
self.type = attributes.object(forCaseInsensitiveKey: "type")
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLOpenGraphProperties {
|
||||
|
||||
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
|
||||
// See http://ogp.me/
|
||||
|
||||
public let image: HTMLOpenGraphImage?
|
||||
|
||||
init(_ urlString: String, _ tags: [HTMLTag]) {
|
||||
|
||||
self.image = Self.parse(tags)
|
||||
}
|
||||
}
|
||||
|
||||
private extension HTMLOpenGraphProperties {
|
||||
|
||||
private static let ogPrefix = "og:"
|
||||
|
||||
struct OGKey {
|
||||
static let property = "property"
|
||||
static let content = "content"
|
||||
}
|
||||
|
||||
struct OGValue {
|
||||
static let ogImage = "og:image"
|
||||
static let ogImageURL = "og:image:url"
|
||||
static let ogImageSecureURL = "og:image:secure_url"
|
||||
static let ogImageType = "og:image:type"
|
||||
static let ogImageAlt = "og:image:alt"
|
||||
static let ogImageWidth = "og:image:width"
|
||||
static let ogImageHeight = "og:image:height"
|
||||
}
|
||||
|
||||
static func parse(_ tags: [HTMLTag]) -> HTMLOpenGraphImage? {
|
||||
|
||||
let metaTags = tags.filter { $0.tagType == .meta }
|
||||
if metaTags.isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
// HTMLOpenGraphImage properties to fill in.
|
||||
var url: String?
|
||||
var secureURL: String?
|
||||
var mimeType: String?
|
||||
var width: CGFloat?
|
||||
var height: CGFloat?
|
||||
var altText: String?
|
||||
|
||||
for tag in metaTags {
|
||||
|
||||
guard let attributes = tag.attributes else {
|
||||
continue
|
||||
}
|
||||
guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else {
|
||||
continue
|
||||
}
|
||||
guard let content = attributes[OGKey.content] else {
|
||||
continue
|
||||
}
|
||||
|
||||
if propertyName == OGValue.ogImage {
|
||||
url = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageURL {
|
||||
url = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageSecureURL {
|
||||
secureURL = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageType {
|
||||
mimeType = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageAlt {
|
||||
altText = content
|
||||
}
|
||||
else if propertyName == OGValue.ogImageWidth {
|
||||
if let value = Double(content) {
|
||||
width = CGFloat(value)
|
||||
}
|
||||
}
|
||||
else if propertyName == OGValue.ogImageHeight {
|
||||
if let value = Double(content) {
|
||||
height = CGFloat(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText)
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLOpenGraphImage {
|
||||
|
||||
public let url : String?
|
||||
public let secureURL: String?
|
||||
public let mimeType: String?
|
||||
public let width: CGFloat?
|
||||
public let height: CGFloat?
|
||||
public let altText: String?
|
||||
|
||||
init(url: String?, secureURL: String?, mimeType: String?, width: CGFloat?, height: CGFloat?, altText: String?) {
|
||||
|
||||
self.url = url
|
||||
self.secureURL = secureURL
|
||||
self.mimeType = mimeType
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.altText = altText
|
||||
}
|
||||
}
|
||||
|
||||
public final class HTMLTwitterProperties {
|
||||
|
||||
public let imageURL: String? // twitter:image:src
|
||||
|
||||
private struct TwitterKey {
|
||||
static let name = "name"
|
||||
static let content = "content"
|
||||
}
|
||||
|
||||
private struct TwitterValue {
|
||||
static let imageSrc = "twitter:image:src"
|
||||
}
|
||||
|
||||
init(_ urlString: String, _ tags: [HTMLTag]) {
|
||||
|
||||
let imageURL: String? = {
|
||||
for tag in tags {
|
||||
guard tag.tagType == .meta else {
|
||||
continue
|
||||
}
|
||||
guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else {
|
||||
continue
|
||||
}
|
||||
guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else {
|
||||
continue
|
||||
}
|
||||
return content
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
|
||||
self.imageURL = imageURL
|
||||
}
|
||||
}
|
||||
|
||||
private func urlString(from attributes: HTMLTagAttributes) -> String? {
|
||||
|
||||
if let urlString = attributes.object(forCaseInsensitiveKey: "href") {
|
||||
return urlString
|
||||
}
|
||||
return attributes.object(forCaseInsensitiveKey: "src")
|
||||
}
|
||||
|
||||
private func relValue(from attributes: HTMLTagAttributes) -> String? {
|
||||
|
||||
attributes.object(forCaseInsensitiveKey: "rel")
|
||||
}
|
||||
|
||||
private func absoluteURLString(from attributes: HTMLTagAttributes, baseURL: String) -> String? {
|
||||
|
||||
guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return absoluteURLStringWithRelativeURLString(urlString, baseURLString: baseURL)
|
||||
}
|
||||
|
||||
private func absoluteURLStringWithRelativeURLString(_ relativeURLString: String, baseURLString: String) -> String? {
|
||||
|
||||
guard let baseURL = URL(string: baseURLString) else {
|
||||
return nil
|
||||
}
|
||||
guard let absoluteURL = URL(string: relativeURLString, relativeTo: baseURL) else {
|
||||
return nil
|
||||
}
|
||||
return absoluteURL.absoluteURL.standardized.absoluteString
|
||||
}
|
||||
|
@ -0,0 +1,102 @@
|
||||
//
|
||||
// HTMLMetadataParser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 9/22/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
|
||||
public final class HTMLMetadataParser {
|
||||
|
||||
private var tags = [HTMLTag]()
|
||||
|
||||
public static func metadata(with parserData: ParserData) -> HTMLMetadata {
|
||||
|
||||
HTMLMetadataParser().parse(parserData)
|
||||
}
|
||||
}
|
||||
|
||||
private extension HTMLMetadataParser {
|
||||
|
||||
func parse(_ parserData: ParserData) -> HTMLMetadata {
|
||||
|
||||
tags = [HTMLTag]()
|
||||
|
||||
let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data)
|
||||
htmlParser.parse()
|
||||
|
||||
return HTMLMetadata(parserData.url, tags)
|
||||
}
|
||||
}
|
||||
|
||||
extension HTMLMetadataParser: SAXHTMLParserDelegate {
|
||||
|
||||
private struct HTMLName {
|
||||
|
||||
static let link = "link".utf8CString
|
||||
static let meta = "meta".utf8CString
|
||||
}
|
||||
|
||||
private struct HTMLKey {
|
||||
|
||||
static let href = "href"
|
||||
static let src = "src"
|
||||
static let rel = "rel"
|
||||
}
|
||||
|
||||
private func link(with attributes: StringDictionary) -> String? {
|
||||
|
||||
if let link = attributes.object(forCaseInsensitiveKey: HTMLKey.href) {
|
||||
return link
|
||||
}
|
||||
|
||||
return attributes.object(forCaseInsensitiveKey: HTMLKey.src)
|
||||
}
|
||||
|
||||
private func handleLinkAttributes(_ attributes: StringDictionary) {
|
||||
|
||||
guard let rel = attributes.object(forCaseInsensitiveKey: HTMLKey.rel), !rel.isEmpty else {
|
||||
return
|
||||
}
|
||||
guard let link = link(with: attributes), !link.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
let tag = HTMLTag(tagType: .link, attributes: attributes)
|
||||
tags.append(tag)
|
||||
}
|
||||
|
||||
private func handleMetaAttributes(_ attributes: StringDictionary) {
|
||||
|
||||
let tag = HTMLTag(tagType: .meta, attributes: attributes)
|
||||
tags.append(tag)
|
||||
}
|
||||
|
||||
public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if SAXEqualTags(name, HTMLName.link) {
|
||||
let d = saxHTMLParser.attributesDictionary(attributes)
|
||||
if let d, !d.isEmpty {
|
||||
handleLinkAttributes(d)
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(name, HTMLName.meta) {
|
||||
let d = saxHTMLParser.attributesDictionary(attributes)
|
||||
if let d, !d.isEmpty {
|
||||
handleMetaAttributes(d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) {
|
||||
|
||||
// Nothing to do
|
||||
}
|
||||
|
||||
public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) {
|
||||
|
||||
// Nothing to do
|
||||
}
|
||||
}
|
26
Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift
Normal file
26
Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift
Normal file
@ -0,0 +1,26 @@
|
||||
//
|
||||
// HTMLTag.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public typealias HTMLTagAttributes = [String: String]
|
||||
|
||||
public struct HTMLTag: Sendable {
|
||||
|
||||
public enum TagType: Sendable {
|
||||
case link
|
||||
case meta
|
||||
}
|
||||
|
||||
public let tagType: TagType
|
||||
public let attributes: HTMLTagAttributes?
|
||||
|
||||
public init(tagType: TagType, attributes: HTMLTagAttributes?) {
|
||||
self.tagType = tagType
|
||||
self.attributes = attributes
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
//
|
||||
// OPMLAttributes.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
// OPML allows for arbitrary attributes.
|
||||
// These are the common attributes in OPML files used as RSS subscription lists.
|
||||
|
||||
private let opmlTextKey = "text"
|
||||
private let opmlTitleKey = "title"
|
||||
private let opmlDescriptionKey = "description"
|
||||
private let opmlTypeKey = "type"
|
||||
private let opmlVersionKey = "version"
|
||||
private let opmlHMTLURLKey = "htmlUrl"
|
||||
private let opmlXMLURLKey = "xmlUrl"
|
||||
|
||||
// A frequent error in OPML files is to mess up the capitalization,
|
||||
// so these do a case-insensitive lookup.
|
||||
|
||||
extension Dictionary where Key == String, Value == String {
|
||||
|
||||
var opml_text: String? {
|
||||
object(forCaseInsensitiveKey: opmlTextKey)
|
||||
}
|
||||
|
||||
var opml_title: String? {
|
||||
object(forCaseInsensitiveKey: opmlTitleKey)
|
||||
}
|
||||
|
||||
var opml_description: String? {
|
||||
object(forCaseInsensitiveKey: opmlDescriptionKey)
|
||||
}
|
||||
|
||||
var opml_type: String? {
|
||||
object(forCaseInsensitiveKey: opmlTypeKey)
|
||||
}
|
||||
|
||||
var opml_version: String? {
|
||||
object(forCaseInsensitiveKey: opmlVersionKey)
|
||||
}
|
||||
|
||||
var opml_htmlUrl: String? {
|
||||
object(forCaseInsensitiveKey: opmlHMTLURLKey)
|
||||
}
|
||||
|
||||
var opml_xmlUrl: String? {
|
||||
object(forCaseInsensitiveKey: opmlXMLURLKey)
|
||||
}
|
||||
}
|
19
Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift
Normal file
19
Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift
Normal file
@ -0,0 +1,19 @@
|
||||
//
|
||||
// OPMLDocument.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class OPMLDocument: OPMLItem {
|
||||
|
||||
public var title: String? = nil
|
||||
public var url: String? = nil
|
||||
|
||||
init(url: String?) {
|
||||
self.url = url
|
||||
super.init(attributes: nil)
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
//
|
||||
// OPMLFeedSpecifier.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct OPMLFeedSpecifier: Sendable {
|
||||
|
||||
public let title: String?
|
||||
public let feedDescription: String?
|
||||
public let homePageURL: String?
|
||||
public let feedURL: String
|
||||
|
||||
init(title: String?, feedDescription: String?, homePageURL: String?, feedURL: String) {
|
||||
|
||||
if String.isEmptyOrNil(title) {
|
||||
self.title = nil
|
||||
} else {
|
||||
self.title = title
|
||||
}
|
||||
|
||||
if String.isEmptyOrNil(feedDescription) {
|
||||
self.feedDescription = nil
|
||||
} else {
|
||||
self.feedDescription = feedDescription
|
||||
}
|
||||
|
||||
if String.isEmptyOrNil(homePageURL) {
|
||||
self.homePageURL = nil
|
||||
} else {
|
||||
self.homePageURL = homePageURL
|
||||
}
|
||||
|
||||
self.feedURL = feedURL
|
||||
}
|
||||
}
|
||||
|
42
Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift
Normal file
42
Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift
Normal file
@ -0,0 +1,42 @@
|
||||
//
|
||||
// OPMLItem.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import os
|
||||
|
||||
public class OPMLItem {
|
||||
|
||||
public let feedSpecifier: OPMLFeedSpecifier?
|
||||
|
||||
public let attributes: [String: String]?
|
||||
public let titleFromAttributes: String?
|
||||
|
||||
public var items: [OPMLItem]?
|
||||
public var isFolder: Bool {
|
||||
(items?.count ?? 0) > 0
|
||||
}
|
||||
|
||||
init(attributes: [String : String]?) {
|
||||
|
||||
self.titleFromAttributes = attributes?.opml_title ?? attributes?.opml_text
|
||||
self.attributes = attributes
|
||||
|
||||
if let feedURL = attributes?.opml_xmlUrl {
|
||||
self.feedSpecifier = OPMLFeedSpecifier(title: self.titleFromAttributes, feedDescription: attributes?.opml_description, homePageURL: attributes?.opml_htmlUrl, feedURL: feedURL)
|
||||
} else {
|
||||
self.feedSpecifier = nil
|
||||
}
|
||||
}
|
||||
|
||||
public func add(_ item: OPMLItem) {
|
||||
|
||||
if items == nil {
|
||||
items = [OPMLItem]()
|
||||
}
|
||||
items?.append(item)
|
||||
}
|
||||
}
|
117
Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift
Normal file
117
Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift
Normal file
@ -0,0 +1,117 @@
|
||||
//
|
||||
// OPMLParser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class OPMLParser {
|
||||
|
||||
private let parserData: ParserData
|
||||
private var data: Data {
|
||||
parserData.data
|
||||
}
|
||||
|
||||
private var opmlDocument: OPMLDocument?
|
||||
|
||||
private var itemStack = [OPMLItem]()
|
||||
private var currentItem: OPMLItem? {
|
||||
itemStack.last
|
||||
}
|
||||
|
||||
/// Returns nil if data can’t be parsed (if it’s not OPML).
|
||||
public static func document(with parserData: ParserData) -> OPMLDocument? {
|
||||
|
||||
let opmlParser = OPMLParser(parserData)
|
||||
opmlParser.parse()
|
||||
return opmlParser.opmlDocument
|
||||
}
|
||||
|
||||
init(_ parserData: ParserData) {
|
||||
self.parserData = parserData
|
||||
}
|
||||
}
|
||||
|
||||
private extension OPMLParser {
|
||||
|
||||
func parse() {
|
||||
|
||||
guard canParseData() else {
|
||||
return
|
||||
}
|
||||
|
||||
opmlDocument = OPMLDocument(url: parserData.url)
|
||||
push(opmlDocument!)
|
||||
|
||||
let saxParser = SAXParser(delegate: self, data: data)
|
||||
saxParser.parse()
|
||||
}
|
||||
|
||||
func canParseData() -> Bool {
|
||||
|
||||
data.containsASCIIString("<opml")
|
||||
}
|
||||
|
||||
func push(_ item: OPMLItem) {
|
||||
|
||||
itemStack.append(item)
|
||||
}
|
||||
|
||||
func popItem() {
|
||||
|
||||
guard itemStack.count > 0 else {
|
||||
assertionFailure("itemStack.count must be > 0")
|
||||
return
|
||||
}
|
||||
|
||||
itemStack.removeLast()
|
||||
}
|
||||
}
|
||||
|
||||
extension OPMLParser: SAXParserDelegate {
|
||||
|
||||
private struct XMLName {
|
||||
static let title = "title".utf8CString
|
||||
static let outline = "outline".utf8CString
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.title) {
|
||||
saxParser.beginStoringCharacters()
|
||||
return
|
||||
}
|
||||
|
||||
if !SAXEqualTags(localName, XMLName.outline) {
|
||||
return
|
||||
}
|
||||
|
||||
let attributesDictionary = saxParser.attributesDictionary(attributes, attributeCount: attributeCount)
|
||||
let item = OPMLItem(attributes: attributesDictionary)
|
||||
|
||||
currentItem?.add(item)
|
||||
push(item)
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.title) {
|
||||
if let item = currentItem as? OPMLDocument {
|
||||
item.title = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
saxParser.endStoringCharacters()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.outline) {
|
||||
popItem()
|
||||
}
|
||||
}
|
||||
|
||||
public func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) {
|
||||
|
||||
// Nothing to do, but method is required.
|
||||
}
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
//
|
||||
// File.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 4/7/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import ParserObjC
|
||||
|
||||
extension ParserData: @unchecked Sendable {}
|
@ -1,11 +0,0 @@
|
||||
//
|
||||
// File.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 4/7/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import ParserObjC
|
||||
|
||||
extension RSHTMLMetadataParser: @unchecked Sendable {}
|
68
Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift
Normal file
68
Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift
Normal file
@ -0,0 +1,68 @@
|
||||
//
|
||||
// Data+Parser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/24/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public extension Data {
|
||||
|
||||
/// Return true if the data contains a given String.
|
||||
///
|
||||
/// Assumes that the data is UTF-8 or similar encoding —
|
||||
/// if it’s UTF-16 or UTF-32, for instance, this will always return false.
|
||||
/// Luckily these are rare.
|
||||
///
|
||||
/// The String to search for should be something that could be encoded
|
||||
/// in ASCII — like "<opml" or "<rss". (In other words,
|
||||
/// the sequence of characters would always be the same in
|
||||
/// commonly-used encodings.)
|
||||
func containsASCIIString(_ searchFor: String) -> Bool {
|
||||
|
||||
contains(searchFor.utf8)
|
||||
}
|
||||
|
||||
/// Return true if searchFor appears in self.
|
||||
func contains(_ searchFor: Data) -> Bool {
|
||||
|
||||
let searchForCount = searchFor.count
|
||||
let dataCount = self.count
|
||||
|
||||
guard searchForCount > 0, searchForCount <= dataCount else {
|
||||
return false
|
||||
}
|
||||
|
||||
let searchForInitialByte = searchFor[0]
|
||||
var found = false
|
||||
|
||||
self.withUnsafeBytes { bytes in
|
||||
|
||||
let buffer = bytes.bindMemory(to: UInt8.self)
|
||||
|
||||
for i in 0...dataCount - searchForCount {
|
||||
|
||||
if buffer[i] == searchForInitialByte {
|
||||
|
||||
var match = true
|
||||
|
||||
for j in 1..<searchForCount {
|
||||
|
||||
if buffer[i + j] != searchFor[j] {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if match {
|
||||
found = true
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return found
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
//
|
||||
// Dictionary+Parser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public extension Dictionary where Key == String, Value == String {
|
||||
|
||||
func object(forCaseInsensitiveKey key: String) -> String? {
|
||||
|
||||
if let object = self[key] {
|
||||
return object
|
||||
}
|
||||
|
||||
let lowercaseKey = key.lowercased()
|
||||
|
||||
for (oneKey, oneValue) in self {
|
||||
if lowercaseKey.caseInsensitiveCompare(oneKey) == .orderedSame {
|
||||
return oneValue
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
@ -8,10 +8,16 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
extension String {
|
||||
public extension String {
|
||||
|
||||
var nilIfEmptyOrWhitespace: String? {
|
||||
return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self
|
||||
}
|
||||
|
||||
static func isEmptyOrNil(_ s: String?) -> Bool {
|
||||
if let s {
|
||||
return s.isEmpty
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
19
Modules/Parser/Sources/Parser/SAX/ParserData.swift
Normal file
19
Modules/Parser/Sources/Parser/SAX/ParserData.swift
Normal file
@ -0,0 +1,19 @@
|
||||
//
|
||||
// ParserData.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParserData: Sendable {
|
||||
|
||||
public let url: String
|
||||
public let data: Data
|
||||
|
||||
public init(url: String, data: Data) {
|
||||
self.url = url
|
||||
self.data = data
|
||||
}
|
||||
}
|
200
Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift
Normal file
200
Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift
Normal file
@ -0,0 +1,200 @@
|
||||
//
|
||||
// SAXHTMLParser.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/26/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
import libxml2
|
||||
|
||||
public protocol SAXHTMLParserDelegate: AnyObject {
|
||||
|
||||
func saxHTMLParser(_: SAXHTMLParser, startElement: XMLPointer, attributes: UnsafePointer<XMLPointer?>?)
|
||||
|
||||
func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer)
|
||||
|
||||
// Length is guaranteed to be greater than 0.
|
||||
func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int)
|
||||
}
|
||||
|
||||
public final class SAXHTMLParser {
|
||||
|
||||
fileprivate let delegate: SAXHTMLParserDelegate
|
||||
|
||||
public var currentCharacters: Data? { // UTF-8 encoded
|
||||
|
||||
guard storingCharacters else {
|
||||
return nil
|
||||
}
|
||||
return characters
|
||||
}
|
||||
|
||||
// Conveniences to get string version of currentCharacters
|
||||
|
||||
public var currentString: String? {
|
||||
|
||||
guard let d = currentCharacters, !d.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
return String(data: d, encoding: .utf8)
|
||||
}
|
||||
|
||||
public var currentStringWithTrimmedWhitespace: String? {
|
||||
|
||||
guard let s = currentString else {
|
||||
return nil
|
||||
}
|
||||
return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||
}
|
||||
|
||||
private var data: Data
|
||||
private var storingCharacters = false
|
||||
private var characters = Data()
|
||||
|
||||
public init(delegate: SAXHTMLParserDelegate, data: Data) {
|
||||
|
||||
self.delegate = delegate
|
||||
self.data = data
|
||||
}
|
||||
|
||||
public func parse() {
|
||||
|
||||
guard !data.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
data.withUnsafeBytes { bufferPointer in
|
||||
|
||||
guard let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress else {
|
||||
return
|
||||
}
|
||||
|
||||
let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count))
|
||||
let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding)
|
||||
htmlCtxtUseOptions(context, Int32(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NONET.rawValue | HTML_PARSE_COMPACT.rawValue | HTML_PARSE_NOERROR.rawValue | HTML_PARSE_NOWARNING.rawValue))
|
||||
|
||||
htmlParseChunk(context, bytes, Int32(data.count), 0)
|
||||
|
||||
htmlParseChunk(context, nil, 0, 1)
|
||||
htmlFreeParserCtxt(context)
|
||||
}
|
||||
}
|
||||
|
||||
/// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement.
|
||||
public func beginStoringCharacters() {
|
||||
|
||||
storingCharacters = true
|
||||
characters.count = 0
|
||||
}
|
||||
|
||||
public func endStoringCharacters() {
|
||||
|
||||
storingCharacters = false
|
||||
characters.count = 0
|
||||
}
|
||||
|
||||
public func attributesDictionary(_ attributes: UnsafePointer<XMLPointer?>?) -> StringDictionary? {
|
||||
|
||||
guard let attributes else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var dictionary = [String: String]()
|
||||
var ix = 0
|
||||
var currentKey: String? = nil
|
||||
|
||||
while true {
|
||||
let oneAttribute = attributes[ix]
|
||||
ix += 1
|
||||
|
||||
if currentKey == nil && oneAttribute == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if currentKey == nil {
|
||||
if let oneAttribute {
|
||||
currentKey = String(cString: oneAttribute)
|
||||
}
|
||||
} else {
|
||||
let value: String?
|
||||
if let oneAttribute {
|
||||
value = String(cString: oneAttribute)
|
||||
} else {
|
||||
value = nil
|
||||
}
|
||||
|
||||
dictionary[currentKey!] = value ?? ""
|
||||
currentKey = nil
|
||||
}
|
||||
}
|
||||
|
||||
return dictionary
|
||||
}
|
||||
}
|
||||
|
||||
private extension SAXHTMLParser {
|
||||
|
||||
func charactersFound(_ htmlCharacters: XMLPointer, count: Int) {
|
||||
|
||||
if storingCharacters {
|
||||
characters.append(htmlCharacters, count: count)
|
||||
}
|
||||
|
||||
delegate.saxHTMLParser(self, charactersFound: htmlCharacters, count: count)
|
||||
}
|
||||
|
||||
func startElement(_ name: XMLPointer, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
delegate.saxHTMLParser(self, startElement: name, attributes: attributes)
|
||||
}
|
||||
|
||||
func endElement(_ name: XMLPointer) {
|
||||
|
||||
delegate.saxHTMLParser(self, endElement: name)
|
||||
endStoringCharacters()
|
||||
}
|
||||
}
|
||||
|
||||
private func parser(from context: UnsafeMutableRawPointer) -> SAXHTMLParser {
|
||||
|
||||
Unmanaged<SAXHTMLParser>.fromOpaque(context).takeUnretainedValue()
|
||||
}
|
||||
|
||||
nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = {
|
||||
|
||||
var handler = htmlSAXHandler()
|
||||
|
||||
handler.characters = { (context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) in
|
||||
|
||||
guard let context, let ch, len > 0 else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.charactersFound(ch, count: Int(len))
|
||||
}
|
||||
|
||||
handler.startElement = { (context: UnsafeMutableRawPointer?, name: XMLPointer?, attributes: UnsafeMutablePointer<XMLPointer?>?) in
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.startElement(name, attributes: attributes)
|
||||
}
|
||||
|
||||
handler.endElement = { (context: UnsafeMutableRawPointer?, name: XMLPointer?) in
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.endElement(name)
|
||||
}
|
||||
|
||||
return handler
|
||||
}()
|
204
Modules/Parser/Sources/Parser/SAX/SAXParser.swift
Normal file
204
Modules/Parser/Sources/Parser/SAX/SAXParser.swift
Normal file
@ -0,0 +1,204 @@
|
||||
//
|
||||
// SAXParser.swift.
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/12/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import FoundationExtras
|
||||
import libxml2
|
||||
|
||||
public typealias XMLPointer = UnsafePointer<xmlChar>
|
||||
|
||||
public protocol SAXParserDelegate {
|
||||
|
||||
func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?)
|
||||
|
||||
func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?)
|
||||
|
||||
func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int)
|
||||
}
|
||||
|
||||
public final class SAXParser {
|
||||
|
||||
fileprivate let delegate: SAXParserDelegate
|
||||
|
||||
public var currentCharacters: Data? { // UTF-8 encoded
|
||||
|
||||
guard storingCharacters else {
|
||||
return nil
|
||||
}
|
||||
return characters
|
||||
}
|
||||
|
||||
// Conveniences to get string version of currentCharacters
|
||||
|
||||
public var currentString: String? {
|
||||
|
||||
guard let d = currentCharacters, !d.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
return String(data: d, encoding: .utf8)
|
||||
}
|
||||
|
||||
public var currentStringWithTrimmedWhitespace: String? {
|
||||
|
||||
guard let s = currentString else {
|
||||
return nil
|
||||
}
|
||||
return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||
}
|
||||
|
||||
private var data: Data
|
||||
private var storingCharacters = false
|
||||
private var characters = Data()
|
||||
|
||||
public init(delegate: SAXParserDelegate, data: Data) {
|
||||
|
||||
self.delegate = delegate
|
||||
self.data = data
|
||||
}
|
||||
|
||||
public func parse() {
|
||||
|
||||
guard !data.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
let context = xmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil)
|
||||
xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue))
|
||||
|
||||
data.withUnsafeBytes { bufferPointer in
|
||||
if let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress {
|
||||
xmlParseChunk(context, bytes, Int32(data.count), 0)
|
||||
}
|
||||
}
|
||||
|
||||
xmlParseChunk(context, nil, 0, 1)
|
||||
xmlFreeParserCtxt(context)
|
||||
}
|
||||
|
||||
/// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement.
|
||||
public func beginStoringCharacters() {
|
||||
|
||||
storingCharacters = true
|
||||
characters.count = 0
|
||||
}
|
||||
|
||||
public func endStoringCharacters() {
|
||||
|
||||
storingCharacters = false
|
||||
characters.count = 0
|
||||
}
|
||||
|
||||
public func attributesDictionary(_ attributes: UnsafePointer<XMLPointer?>?, attributeCount: Int) -> StringDictionary? {
|
||||
|
||||
guard attributeCount > 0, let attributes else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var dictionary = [String: String]()
|
||||
|
||||
let fieldCount = 5
|
||||
var i = 0, j = 0
|
||||
while i < attributeCount {
|
||||
|
||||
guard let attribute = attributes[j] else {
|
||||
continue
|
||||
}
|
||||
let prefix = attributes[j + 1]
|
||||
var attributeName = String(cString: attribute)
|
||||
if let prefix {
|
||||
let attributePrefix = String(cString: prefix)
|
||||
attributeName = "\(attributePrefix):\(attributeName)"
|
||||
}
|
||||
|
||||
guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else {
|
||||
continue
|
||||
}
|
||||
let valueCount = valueEnd - valueStart
|
||||
let value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8)
|
||||
|
||||
if let value {
|
||||
dictionary[attributeName] = value
|
||||
}
|
||||
|
||||
i += 1
|
||||
j += fieldCount
|
||||
}
|
||||
|
||||
return dictionary
|
||||
}
|
||||
}
|
||||
|
||||
private extension SAXParser {
|
||||
|
||||
func charactersFound(_ xmlCharacters: XMLPointer, count: Int) {
|
||||
|
||||
if storingCharacters {
|
||||
characters.append(xmlCharacters, count: count)
|
||||
}
|
||||
|
||||
delegate.saxParser(self, xmlCharactersFound: xmlCharacters, count: count)
|
||||
}
|
||||
|
||||
func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
delegate.saxParser(self, xmlStartElement: name, prefix: prefix, uri: uri, namespaceCount: namespaceCount, namespaces: namespaces, attributeCount: attributeCount, attributesDefaultedCount: attributesDefaultedCount, attributes: attributes)
|
||||
}
|
||||
|
||||
func endElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
delegate.saxParser(self, xmlEndElement: name, prefix: prefix, uri: uri)
|
||||
endStoringCharacters()
|
||||
}
|
||||
}
|
||||
|
||||
private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer<XMLPointer?>?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer<XMLPointer?>?) {
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.startElement(name, prefix: prefix, uri: URI, namespaceCount: Int(nb_namespaces), namespaces: namespaces, attributeCount: Int(nb_attributes), attributesDefaultedCount: Int(nb_defaulted), attributes: attributes)
|
||||
}
|
||||
|
||||
private func endElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?) {
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.endElement(name, prefix: prefix, uri: URI)
|
||||
}
|
||||
|
||||
private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) {
|
||||
|
||||
guard let context, let ch, len > 0 else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.charactersFound(ch, count: Int(len))
|
||||
}
|
||||
|
||||
private func parser(from context: UnsafeMutableRawPointer) -> SAXParser {
|
||||
|
||||
Unmanaged<SAXParser>.fromOpaque(context).takeUnretainedValue()
|
||||
}
|
||||
|
||||
nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = {
|
||||
|
||||
var handler = xmlSAXHandler()
|
||||
|
||||
handler.characters = charactersFound
|
||||
handler.startElementNs = startElement
|
||||
handler.endElementNs = endElement
|
||||
handler.initialized = XML_SAX2_MAGIC
|
||||
|
||||
return handler
|
||||
}()
|
||||
|
41
Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift
Normal file
41
Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift
Normal file
@ -0,0 +1,41 @@
|
||||
//
|
||||
// SAXUtilities.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/26/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import libxml2
|
||||
|
||||
public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray<Int8>) -> Bool {
|
||||
|
||||
return tag.withUnsafeBufferPointer { bufferPointer in
|
||||
|
||||
let tagCount = tag.count // includes 0 terminator
|
||||
|
||||
for i in 0..<tagCount - 1 {
|
||||
|
||||
let localNameCharacter = localName[i]
|
||||
if localNameCharacter == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
let tagCharacter = UInt8(tag[i])
|
||||
if localNameCharacter != tagCharacter {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// localName might actually be longer — make sure it’s the same length as tag.
|
||||
return localName[tagCount - 1] == 0
|
||||
}
|
||||
}
|
||||
|
||||
public extension String {
|
||||
|
||||
init?(xmlPointer: XMLPointer, count: Int? = nil) {
|
||||
let d = Data(bytes: xmlPointer, count: count ?? strlen(xmlPointer))
|
||||
self.init(data: d, encoding: .utf8)
|
||||
}
|
||||
}
|
@ -9,14 +9,14 @@
|
||||
import XCTest
|
||||
import Parser
|
||||
|
||||
class AtomParserTests: XCTestCase {
|
||||
final class AtomParserTests: XCTestCase {
|
||||
|
||||
func testDaringFireballPerformance() {
|
||||
|
||||
// 0.009 sec on my 2012 iMac.
|
||||
let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,22 +25,22 @@ class AtomParserTests: XCTestCase {
|
||||
// 0.003 sec on my 2012 iMac.
|
||||
let d = parserData("allthis", "atom", "http://leancrew.com/all-this")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
func testGettingHomePageLink() async {
|
||||
func testGettingHomePageLink() {
|
||||
|
||||
let d = parserData("allthis", "atom", "http://leancrew.com/all-this")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this")
|
||||
}
|
||||
|
||||
func testDaringFireball() async {
|
||||
func testDaringFireball() {
|
||||
|
||||
let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
for article in parsedFeed.items {
|
||||
|
||||
@ -65,12 +65,12 @@ class AtomParserTests: XCTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
func test4fsodonlineAttachments() async {
|
||||
func test4fsodonlineAttachments() {
|
||||
|
||||
// Thanks to Marco for finding me some Atom podcast feeds. Apparently they’re super-rare.
|
||||
|
||||
let d = parserData("4fsodonline", "atom", "http://4fsodonline.blogspot.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
for article in parsedFeed.items {
|
||||
|
||||
@ -83,12 +83,12 @@ class AtomParserTests: XCTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
func testExpertOpinionENTAttachments() async {
|
||||
func testExpertOpinionENTAttachments() {
|
||||
|
||||
// Another from Marco.
|
||||
|
||||
let d = parserData("expertopinionent", "atom", "http://expertopinionent.typepad.com/my-blog/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
for article in parsedFeed.items {
|
||||
|
||||
|
139
Modules/Parser/Tests/ParserTests/DateParserTests.swift
Normal file
139
Modules/Parser/Tests/ParserTests/DateParserTests.swift
Normal file
@ -0,0 +1,139 @@
|
||||
//
|
||||
// RSDateParserTests.swift
|
||||
//
|
||||
//
|
||||
// Created by Maurice Parker on 4/1/21.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import XCTest
|
||||
@testable import Parser
|
||||
|
||||
final class DateParserTests: XCTestCase {
|
||||
|
||||
func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ millisecond: Int = 0) -> Date {
|
||||
var dateComponents = DateComponents()
|
||||
dateComponents.calendar = Calendar.current
|
||||
dateComponents.timeZone = TimeZone(secondsFromGMT: 0)
|
||||
|
||||
dateComponents.year = year
|
||||
dateComponents.month = month
|
||||
dateComponents.day = day
|
||||
dateComponents.hour = hour
|
||||
dateComponents.minute = minute
|
||||
dateComponents.second = second
|
||||
dateComponents.nanosecond = millisecond * 1000000
|
||||
|
||||
return dateComponents.date!
|
||||
}
|
||||
|
||||
func testDateWithString() {
|
||||
var expectedDateResult = dateWithValues(2010, 5, 28, 21, 3, 38)
|
||||
|
||||
var d = date("Fri, 28 May 2010 21:03:38 +0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("Fri, 28 May 2010 21:03:38 +00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("Fri, 28 May 2010 21:03:38 -00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("Fri, 28 May 2010 21:03:38 -0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("Fri, 28 May 2010 21:03:38 GMT")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("2010-05-28T21:03:38+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("2010-05-28T21:03:38+0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("2010-05-28T21:03:38-0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("2010-05-28T21:03:38-00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = date("2010-05-28T21:03:38Z")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 7, 13, 17, 6, 40)
|
||||
d = date("2010-07-13T17:06:40+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 4, 30, 12, 0, 0)
|
||||
d = date("30 Apr 2010 5:00 PDT")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 5, 21, 21, 22, 53)
|
||||
d = date("21 May 2010 21:22:53 GMT")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 6, 9, 5, 0, 0)
|
||||
d = date("Wed, 09 Jun 2010 00:00 EST")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 6, 23, 3, 43, 50)
|
||||
d = date("Wed, 23 Jun 2010 03:43:50 Z")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 6, 22, 3, 57, 49)
|
||||
d = date("2010-06-22T03:57:49+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07)
|
||||
d = date("2010-11-17T08:40:07-05:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testAtomDateWithMissingTCharacter() {
|
||||
let expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07)
|
||||
let d = date("2010-11-17 08:40:07-05:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testFeedbinDate() {
|
||||
let expectedDateResult = dateWithValues(2019, 9, 27, 21, 01, 48)
|
||||
let d = date("2019-09-27T21:01:48.000000Z")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testMillisecondDate() {
|
||||
let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56, 516)
|
||||
let d = date("2021-03-29T10:46:56.516+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testExtraMillisecondPrecisionDate() {
|
||||
let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56, 516)
|
||||
let d = date("2021-03-29T10:46:56.516941+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testW3CParsingPerformance() {
|
||||
|
||||
// 0.0001 seconds on my Mac Studio M1
|
||||
self.measure {
|
||||
_ = date("2021-03-29T10:46:56.516941+00:00")
|
||||
}
|
||||
}
|
||||
|
||||
func testPubDateParsingPerformance() {
|
||||
|
||||
// 0.0001 seconds on my Mac Studio M1
|
||||
self.measure {
|
||||
_ = date("21 May 2010 21:22:53 GMT")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension DateParserTests {
|
||||
|
||||
func date(_ string: String) -> Date? {
|
||||
let d = Data(string.utf8)
|
||||
return DateParser.date(data: d)
|
||||
}
|
||||
}
|
@ -9,39 +9,70 @@
|
||||
import XCTest
|
||||
import Parser
|
||||
|
||||
class EntityDecodingTests: XCTestCase {
|
||||
final class EntityDecodingTests: XCTestCase {
|
||||
|
||||
func test39Decoding() {
|
||||
|
||||
// Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog.
|
||||
|
||||
let s = "These are the times that try men's souls."
|
||||
let decoded = s.rsparser_stringByDecodingHTMLEntities()
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "These are the times that try men's souls.")
|
||||
}
|
||||
|
||||
func testEntities() {
|
||||
func testEntityAtBeginning() {
|
||||
|
||||
let s = "'leading single quote"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "'leading single quote")
|
||||
}
|
||||
|
||||
func testEntityAtEnd() {
|
||||
|
||||
let s = "trailing single quote'"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "trailing single quote'")
|
||||
}
|
||||
|
||||
func testEntityInMiddle() {
|
||||
|
||||
let s = "entity ç in middle"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "entity ç in middle")
|
||||
}
|
||||
|
||||
func testMultipleEntitiesInARow() {
|
||||
|
||||
let s = "çèmult……iple 'æ"entities÷♥"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "çèmult……iple 'æ\"entities÷♥")
|
||||
}
|
||||
|
||||
func testOnlyEntity() {
|
||||
var s = "…"
|
||||
var decoded = s.rsparser_stringByDecodingHTMLEntities()
|
||||
var decoded = HTMLEntityDecoder.decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "…")
|
||||
|
||||
s = "…"
|
||||
decoded = s.rsparser_stringByDecodingHTMLEntities()
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
XCTAssertEqual(decoded, "…")
|
||||
|
||||
s = "'"
|
||||
decoded = s.rsparser_stringByDecodingHTMLEntities()
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
XCTAssertEqual(decoded, "'")
|
||||
|
||||
s = "§"
|
||||
decoded = s.rsparser_stringByDecodingHTMLEntities()
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
XCTAssertEqual(decoded, "§")
|
||||
|
||||
s = "£"
|
||||
decoded = s.rsparser_stringByDecodingHTMLEntities()
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
XCTAssertEqual(decoded, "£")
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -7,38 +7,37 @@
|
||||
//
|
||||
|
||||
import XCTest
|
||||
import Parser
|
||||
import ParserObjC
|
||||
@testable import Parser
|
||||
|
||||
class FeedParserTypeTests: XCTestCase {
|
||||
final class FeedParserTypeTests: XCTestCase {
|
||||
|
||||
// MARK: HTML
|
||||
|
||||
func testDaringFireballHTMLType() {
|
||||
|
||||
let d = parserData("DaringFireball", "html", "http://daringfireball.net/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .notAFeed)
|
||||
}
|
||||
|
||||
func testFurboHTMLType() {
|
||||
|
||||
let d = parserData("furbo", "html", "http://furbo.org/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .notAFeed)
|
||||
}
|
||||
|
||||
func testInessentialHTMLType() {
|
||||
|
||||
let d = parserData("inessential", "html", "http://inessential.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .notAFeed)
|
||||
}
|
||||
|
||||
func testSixColorsHTMLType() {
|
||||
|
||||
let d = parserData("sixcolors", "html", "https://sixcolors.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .notAFeed)
|
||||
}
|
||||
|
||||
@ -47,68 +46,68 @@ class FeedParserTypeTests: XCTestCase {
|
||||
func testEMarleyRSSType() {
|
||||
|
||||
let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testScriptingNewsRSSType() {
|
||||
|
||||
let d = parserData("scriptingNews", "rss", "http://scripting.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testKatieFloydRSSType() {
|
||||
|
||||
let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testMantonRSSType() {
|
||||
|
||||
let d = parserData("manton", "rss", "http://manton.org/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testDCRainmakerRSSType() {
|
||||
|
||||
let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testMacworldRSSType() {
|
||||
|
||||
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testNatashaTheRobotRSSType() {
|
||||
|
||||
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testDontHitSaveRSSWithBOMType() {
|
||||
|
||||
let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testBioRDF() {
|
||||
let d = parserData("bio", "rdf", "http://connect.biorxiv.org/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
func testPHPXML() {
|
||||
let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rss)
|
||||
}
|
||||
|
||||
@ -118,20 +117,20 @@ class FeedParserTypeTests: XCTestCase {
|
||||
|
||||
// File extension is .rss, but it’s really an Atom feed.
|
||||
let d = parserData("DaringFireball", "rss", "http://daringfireball.net/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .atom)
|
||||
}
|
||||
|
||||
func testOneFootTsunamiAtomType() {
|
||||
|
||||
let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .atom)
|
||||
}
|
||||
|
||||
func testRussCoxAtomType() {
|
||||
let d = parserData("russcox", "atom", "https://research.swtch.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .atom)
|
||||
}
|
||||
|
||||
@ -140,7 +139,7 @@ class FeedParserTypeTests: XCTestCase {
|
||||
func testScriptingNewsJSONType() {
|
||||
|
||||
let d = parserData("ScriptingNews", "json", "http://scripting.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .rssInJSON)
|
||||
}
|
||||
|
||||
@ -149,35 +148,35 @@ class FeedParserTypeTests: XCTestCase {
|
||||
func testInessentialJSONFeedType() {
|
||||
|
||||
let d = parserData("inessential", "json", "http://inessential.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .jsonFeed)
|
||||
}
|
||||
|
||||
func testAllThisJSONFeedType() {
|
||||
|
||||
let d = parserData("allthis", "json", "http://leancrew.com/allthis/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .jsonFeed)
|
||||
}
|
||||
|
||||
func testCurtJSONFeedType() {
|
||||
|
||||
let d = parserData("curt", "json", "http://curtclifton.net/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .jsonFeed)
|
||||
}
|
||||
|
||||
func testPixelEnvyJSONFeedType() {
|
||||
|
||||
let d = parserData("pxlnv", "json", "http://pxlnv.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .jsonFeed)
|
||||
}
|
||||
|
||||
func testRoseJSONFeedType() {
|
||||
|
||||
let d = parserData("rose", "json", "https://www.rosemaryorchard.com/")
|
||||
let type = feedType(d)
|
||||
let type = FeedType.feedType(d.data)
|
||||
XCTAssertTrue(type == .jsonFeed)
|
||||
}
|
||||
|
||||
@ -189,7 +188,7 @@ class FeedParserTypeTests: XCTestCase {
|
||||
// The type detector should return .unknown rather than .notAFeed.
|
||||
|
||||
let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/")
|
||||
let type = feedType(d, isPartialData: true)
|
||||
let type = FeedType.feedType(d.data, isPartialData: true)
|
||||
XCTAssertEqual(type, .unknown)
|
||||
}
|
||||
|
||||
@ -201,7 +200,7 @@ class FeedParserTypeTests: XCTestCase {
|
||||
|
||||
let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
|
||||
self.measure {
|
||||
let _ = feedType(d)
|
||||
let _ = FeedType.feedType(d.data)
|
||||
}
|
||||
}
|
||||
|
||||
@ -211,7 +210,7 @@ class FeedParserTypeTests: XCTestCase {
|
||||
|
||||
let d = parserData("inessential", "json", "http://inessential.com/")
|
||||
self.measure {
|
||||
let _ = feedType(d)
|
||||
let _ = FeedType.feedType(d.data)
|
||||
}
|
||||
}
|
||||
|
||||
@ -221,7 +220,7 @@ class FeedParserTypeTests: XCTestCase {
|
||||
|
||||
let d = parserData("DaringFireball", "html", "http://daringfireball.net/")
|
||||
self.measure {
|
||||
let _ = feedType(d)
|
||||
let _ = FeedType.feedType(d.data)
|
||||
}
|
||||
}
|
||||
|
||||
@ -231,15 +230,7 @@ class FeedParserTypeTests: XCTestCase {
|
||||
|
||||
let d = parserData("DaringFireball", "rss", "http://daringfireball.net/")
|
||||
self.measure {
|
||||
let _ = feedType(d)
|
||||
let _ = FeedType.feedType(d.data)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData {
|
||||
let filename = "Resources/\(filename)"
|
||||
let path = Bundle.module.path(forResource: filename, ofType: fileExtension)!
|
||||
let data = try! Data(contentsOf: URL(fileURLWithPath: path))
|
||||
return ParserData(url: url, data: data)
|
||||
}
|
||||
|
@ -8,23 +8,22 @@
|
||||
|
||||
import XCTest
|
||||
import Parser
|
||||
import ParserObjC
|
||||
|
||||
class HTMLLinkTests: XCTestCase {
|
||||
final class HTMLLinkTests: XCTestCase {
|
||||
|
||||
func testSixColorsPerformance() {
|
||||
|
||||
// 0.003 sec on my 2012 iMac
|
||||
let d = parserData("sixcolors", "html", "http://sixcolors.com/")
|
||||
self.measure {
|
||||
let _ = RSHTMLLinkParser.htmlLinks(with: d)
|
||||
let _ = HTMLLinkParser.htmlLinks(with: d)
|
||||
}
|
||||
}
|
||||
|
||||
func testSixColorsLink() {
|
||||
|
||||
let d = parserData("sixcolors", "html", "http://sixcolors.com/")
|
||||
let links = RSHTMLLinkParser.htmlLinks(with: d)
|
||||
let links = HTMLLinkParser.htmlLinks(with: d)
|
||||
|
||||
let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php"
|
||||
let textToFind = "this week’s episode of The Incomparable"
|
||||
@ -39,5 +38,4 @@ class HTMLLinkTests: XCTestCase {
|
||||
XCTAssertTrue(found)
|
||||
XCTAssertEqual(links.count, 131)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -8,20 +8,19 @@
|
||||
|
||||
import XCTest
|
||||
import Parser
|
||||
import ParserObjC
|
||||
|
||||
class HTMLMetadataTests: XCTestCase {
|
||||
final class HTMLMetadataTests: XCTestCase {
|
||||
|
||||
func testDaringFireball() {
|
||||
|
||||
let d = parserData("DaringFireball", "html", "http://daringfireball.net/")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
|
||||
XCTAssertEqual(metadata.favicons.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005")
|
||||
XCTAssertEqual(metadata.favicons?.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005")
|
||||
|
||||
XCTAssertEqual(metadata.feedLinks.count, 1)
|
||||
XCTAssertEqual(metadata.feedLinks?.count, 1)
|
||||
|
||||
let feedLink = metadata.feedLinks.first!
|
||||
let feedLink: HTMLMetadataFeedLink = (metadata.feedLinks?.first!)!
|
||||
XCTAssertNil(feedLink.title)
|
||||
XCTAssertEqual(feedLink.type, "application/atom+xml")
|
||||
XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main")
|
||||
@ -32,20 +31,20 @@ class HTMLMetadataTests: XCTestCase {
|
||||
// 0.002 sec on my 2012 iMac
|
||||
let d = parserData("DaringFireball", "html", "http://daringfireball.net/")
|
||||
self.measure {
|
||||
let _ = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let _ = HTMLMetadataParser.metadata(with: d)
|
||||
}
|
||||
}
|
||||
|
||||
func testFurbo() {
|
||||
|
||||
let d = parserData("furbo", "html", "http://furbo.org/")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
|
||||
XCTAssertEqual(metadata.favicons.first?.urlString, "http://furbo.org/favicon.ico")
|
||||
XCTAssertEqual(metadata.favicons?.first?.urlString, "http://furbo.org/favicon.ico")
|
||||
|
||||
XCTAssertEqual(metadata.feedLinks.count, 1)
|
||||
XCTAssertEqual(metadata.feedLinks?.count, 1)
|
||||
|
||||
let feedLink = metadata.feedLinks.first!
|
||||
let feedLink = (metadata.feedLinks?.first!)!
|
||||
XCTAssertEqual(feedLink.title, "Iconfactory News Feed")
|
||||
XCTAssertEqual(feedLink.type, "application/rss+xml")
|
||||
}
|
||||
@ -55,24 +54,24 @@ class HTMLMetadataTests: XCTestCase {
|
||||
// 0.001 sec on my 2012 iMac
|
||||
let d = parserData("furbo", "html", "http://furbo.org/")
|
||||
self.measure {
|
||||
let _ = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let _ = HTMLMetadataParser.metadata(with: d)
|
||||
}
|
||||
}
|
||||
|
||||
func testInessential() {
|
||||
|
||||
let d = parserData("inessential", "html", "http://inessential.com/")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
|
||||
XCTAssertNil(metadata.favicons.first?.urlString)
|
||||
XCTAssertNil(metadata.favicons?.first?.urlString)
|
||||
|
||||
XCTAssertEqual(metadata.feedLinks.count, 1)
|
||||
let feedLink = metadata.feedLinks.first!
|
||||
XCTAssertEqual(metadata.feedLinks?.count, 1)
|
||||
let feedLink = (metadata.feedLinks?.first!)!
|
||||
XCTAssertEqual(feedLink.title, "RSS")
|
||||
XCTAssertEqual(feedLink.type, "application/rss+xml")
|
||||
XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml")
|
||||
|
||||
XCTAssertEqual(metadata.appleTouchIcons.count, 0);
|
||||
XCTAssertEqual(metadata.appleTouchIcons?.count ?? 0, 0);
|
||||
}
|
||||
|
||||
func testInessentialPerformance() {
|
||||
@ -80,7 +79,7 @@ class HTMLMetadataTests: XCTestCase {
|
||||
// 0.001 sec on my 2012 iMac
|
||||
let d = parserData("inessential", "html", "http://inessential.com/")
|
||||
self.measure {
|
||||
let _ = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let _ = HTMLMetadataParser.metadata(with: d)
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,25 +88,25 @@ class HTMLMetadataTests: XCTestCase {
|
||||
// 0.004 sec on my 2012 iMac
|
||||
let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/")
|
||||
self.measure {
|
||||
let _ = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let _ = HTMLMetadataParser.metadata(with: d)
|
||||
}
|
||||
}
|
||||
|
||||
func testSixColors() {
|
||||
|
||||
let d = parserData("sixcolors", "html", "http://sixcolors.com/")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
|
||||
XCTAssertEqual(metadata.favicons.first?.urlString, "https://sixcolors.com/images/favicon.ico")
|
||||
XCTAssertEqual(metadata.favicons?.first?.urlString, "https://sixcolors.com/images/favicon.ico")
|
||||
|
||||
XCTAssertEqual(metadata.feedLinks.count, 1);
|
||||
let feedLink = metadata.feedLinks.first!
|
||||
XCTAssertEqual(metadata.feedLinks?.count, 1);
|
||||
let feedLink = (metadata.feedLinks?.first!)!
|
||||
XCTAssertEqual(feedLink.title, "RSS");
|
||||
XCTAssertEqual(feedLink.type, "application/rss+xml");
|
||||
XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors");
|
||||
|
||||
XCTAssertEqual(metadata.appleTouchIcons.count, 6);
|
||||
let icon = metadata.appleTouchIcons[3];
|
||||
XCTAssertEqual(metadata.appleTouchIcons!.count, 6);
|
||||
let icon = metadata.appleTouchIcons![3];
|
||||
XCTAssertEqual(icon.rel, "apple-touch-icon");
|
||||
XCTAssertEqual(icon.sizes, "120x120");
|
||||
XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png");
|
||||
@ -118,24 +117,24 @@ class HTMLMetadataTests: XCTestCase {
|
||||
// 0.002 sec on my 2012 iMac
|
||||
let d = parserData("sixcolors", "html", "http://sixcolors.com/")
|
||||
self.measure {
|
||||
let _ = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let _ = HTMLMetadataParser.metadata(with: d)
|
||||
}
|
||||
}
|
||||
|
||||
func testCocoOGImage() {
|
||||
|
||||
let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let openGraphData = metadata.openGraphProperties
|
||||
let image = openGraphData.images.first!
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
let openGraphData = metadata.openGraphProperties!
|
||||
let image = openGraphData.image!
|
||||
XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177")
|
||||
}
|
||||
|
||||
func testCocoTwitterImage() {
|
||||
|
||||
let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let twitterData = metadata.twitterProperties
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
let twitterData = metadata.twitterProperties!
|
||||
let imageURL = twitterData.imageURL!
|
||||
XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177")
|
||||
}
|
||||
@ -143,10 +142,10 @@ class HTMLMetadataTests: XCTestCase {
|
||||
func testYouTube() {
|
||||
// YouTube is a special case — the feed links appear after the head section, in the body section.
|
||||
let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks")
|
||||
let metadata = RSHTMLMetadataParser.htmlMetadata(with: d)
|
||||
let metadata = HTMLMetadataParser.metadata(with: d)
|
||||
|
||||
XCTAssertEqual(metadata.feedLinks.count, 1);
|
||||
let feedLink = metadata.feedLinks.first!
|
||||
XCTAssertEqual(metadata.feedLinks!.count, 1);
|
||||
let feedLink = metadata.feedLinks!.first!
|
||||
XCTAssertEqual(feedLink.title, "RSS");
|
||||
XCTAssertEqual(feedLink.type, "application/rss+xml");
|
||||
XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ");
|
||||
|
@ -9,14 +9,14 @@
|
||||
import XCTest
|
||||
import Parser
|
||||
|
||||
class JSONFeedParserTests: XCTestCase {
|
||||
final class JSONFeedParserTests: XCTestCase {
|
||||
|
||||
func testInessentialPerformance() {
|
||||
|
||||
// 0.001 sec on my 2012 iMac.
|
||||
let d = parserData("inessential", "json", "http://inessential.com/")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,14 +25,14 @@ class JSONFeedParserTests: XCTestCase {
|
||||
// 0.009 sec on my 2012 iMac.
|
||||
let d = parserData("DaringFireball", "json", "http://daringfireball.net/")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
func testGettingFaviconAndIconURLs() async {
|
||||
|
||||
let d = parserData("DaringFireball", "json", "http://daringfireball.net/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png")
|
||||
XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png")
|
||||
@ -41,7 +41,7 @@ class JSONFeedParserTests: XCTestCase {
|
||||
func testAllThis() async {
|
||||
|
||||
let d = parserData("allthis", "json", "http://leancrew.com/allthis/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertEqual(parsedFeed.items.count, 12)
|
||||
}
|
||||
@ -49,7 +49,7 @@ class JSONFeedParserTests: XCTestCase {
|
||||
func testCurt() async {
|
||||
|
||||
let d = parserData("curt", "json", "http://curtclifton.net/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertEqual(parsedFeed.items.count, 26)
|
||||
|
||||
@ -67,20 +67,20 @@ class JSONFeedParserTests: XCTestCase {
|
||||
func testPixelEnvy() async {
|
||||
|
||||
let d = parserData("pxlnv", "json", "http://pxlnv.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.items.count, 20)
|
||||
|
||||
}
|
||||
|
||||
func testRose() async {
|
||||
let d = parserData("rose", "json", "http://www.rosemaryorchard.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.items.count, 84)
|
||||
}
|
||||
|
||||
func test3960() async {
|
||||
let d = parserData("3960", "json", "http://journal.3960.org/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.items.count, 20)
|
||||
XCTAssertEqual(parsedFeed.language, "de-DE")
|
||||
|
||||
@ -91,7 +91,7 @@ class JSONFeedParserTests: XCTestCase {
|
||||
|
||||
func testAuthors() async {
|
||||
let d = parserData("authors", "json", "https://example.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.items.count, 4)
|
||||
|
||||
let rootAuthors = Set([
|
||||
|
@ -7,32 +7,33 @@
|
||||
//
|
||||
|
||||
import XCTest
|
||||
import Parser
|
||||
import ParserObjC
|
||||
@testable import Parser
|
||||
|
||||
class OPMLTests: XCTestCase {
|
||||
final class OPMLTests: XCTestCase {
|
||||
|
||||
let subsData = parserData("Subs", "opml", "http://example.org/")
|
||||
|
||||
func testOPMLParsingPerformance() {
|
||||
|
||||
// 0.002 sec on my 2012 iMac.
|
||||
// 0.003 sec on my M1 Mac Studio 2022
|
||||
self.measure {
|
||||
let _ = try! RSOPMLParser.parseOPML(with: self.subsData)
|
||||
let _ = OPMLParser.document(with: self.subsData)
|
||||
}
|
||||
}
|
||||
|
||||
func testNotOPML() {
|
||||
|
||||
let d = parserData("DaringFireball", "rss", "http://daringfireball.net/")
|
||||
XCTAssertThrowsError(try RSOPMLParser.parseOPML(with: d))
|
||||
XCTAssertNil(OPMLParser.document(with: d))
|
||||
}
|
||||
|
||||
func testSubsStructure() {
|
||||
let opmlDocument = try! RSOPMLParser.parseOPML(with: subsData)
|
||||
XCTAssertEqual("Subs", opmlDocument.title)
|
||||
XCTAssertEqual("http://example.org/", opmlDocument.url)
|
||||
recursivelyCheckOPMLStructure(opmlDocument)
|
||||
let opmlDocument = OPMLParser.document(with: subsData)
|
||||
XCTAssertNotNil(opmlDocument)
|
||||
|
||||
XCTAssertEqual("Subs", opmlDocument!.title)
|
||||
XCTAssertEqual("http://example.org/", opmlDocument!.url)
|
||||
recursivelyCheckOPMLStructure(opmlDocument!)
|
||||
}
|
||||
|
||||
|
||||
@ -42,23 +43,23 @@ class OPMLTests: XCTestCase {
|
||||
// which appears to be true with OPML generated by The Old Reader.
|
||||
|
||||
let d = parserData("SubsNoTitleAttributes", "opml", "http://example.org/")
|
||||
let opmlDocument = try! RSOPMLParser.parseOPML(with: d)
|
||||
recursivelyCheckOPMLStructure(opmlDocument)
|
||||
let opmlDocument = OPMLParser.document(with: d)
|
||||
recursivelyCheckOPMLStructure(opmlDocument!)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private extension OPMLTests {
|
||||
|
||||
func recursivelyCheckOPMLStructure(_ item: RSOPMLItem) {
|
||||
func recursivelyCheckOPMLStructure(_ item: OPMLItem) {
|
||||
let feedSpecifier = item.feedSpecifier
|
||||
if !(item is RSOPMLDocument) {
|
||||
XCTAssertNotNil((item.attributes! as NSDictionary).opml_text)
|
||||
if !(item is OPMLDocument) {
|
||||
XCTAssertNotNil(item.attributes!.opml_text)
|
||||
}
|
||||
|
||||
// If it has no children, it should have a feed specifier. The converse is also true.
|
||||
var isFolder = item.children != nil && item.children!.count > 0
|
||||
if !isFolder && (item.attributes! as NSDictionary).opml_title == "Skip" {
|
||||
var isFolder = item.items != nil && item.items!.count > 0
|
||||
if !isFolder && item.attributes?.opml_title == "Skip" {
|
||||
isFolder = true
|
||||
}
|
||||
|
||||
@ -70,10 +71,17 @@ private extension OPMLTests {
|
||||
XCTAssertNil(feedSpecifier)
|
||||
}
|
||||
|
||||
if item.children != nil && item.children!.count > 0 {
|
||||
for oneItem in item.children! {
|
||||
if item.items != nil && item.items!.count > 0 {
|
||||
for oneItem in item.items! {
|
||||
recursivelyCheckOPMLStructure(oneItem)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData {
|
||||
let filename = "Resources/\(filename)"
|
||||
let path = Bundle.module.path(forResource: filename, ofType: fileExtension)!
|
||||
let data = try! Data(contentsOf: URL(fileURLWithPath: path))
|
||||
return ParserData(url: url, data: data)
|
||||
}
|
||||
|
@ -1,12 +0,0 @@
|
||||
import XCTest
|
||||
@testable import Parser
|
||||
|
||||
final class ParserTests: XCTestCase {
|
||||
func testExample() throws {
|
||||
// XCTest Documentation
|
||||
// https://developer.apple.com/documentation/xctest
|
||||
|
||||
// Defining Test Cases and Test Methods
|
||||
// https://developer.apple.com/documentation/xctest/defining_test_cases_and_test_methods
|
||||
}
|
||||
}
|
@ -1,109 +0,0 @@
|
||||
//
|
||||
// RSDateParserTests.swift
|
||||
//
|
||||
//
|
||||
// Created by Maurice Parker on 4/1/21.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import XCTest
|
||||
import Parser
|
||||
|
||||
class RSDateParserTests: XCTestCase {
|
||||
|
||||
static func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int = 0) -> Date {
|
||||
var dateComponents = DateComponents()
|
||||
dateComponents.calendar = Calendar.current
|
||||
dateComponents.timeZone = TimeZone(secondsFromGMT: 0)
|
||||
|
||||
dateComponents.year = year
|
||||
dateComponents.month = month
|
||||
dateComponents.day = day
|
||||
dateComponents.hour = hour
|
||||
dateComponents.minute = minute
|
||||
dateComponents.second = second
|
||||
dateComponents.nanosecond = milliseconds * 1000000
|
||||
|
||||
return dateComponents.date!
|
||||
}
|
||||
|
||||
func testDateWithString() {
|
||||
var expectedDateResult = Self.dateWithValues(2010, 5, 28, 21, 3, 38)
|
||||
|
||||
var d = RSDateWithString("Fri, 28 May 2010 21:03:38 +0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("Fri, 28 May 2010 21:03:38 +00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("Fri, 28 May 2010 21:03:38 -00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("Fri, 28 May 2010 21:03:38 -0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("Fri, 28 May 2010 21:03:38 GMT")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("2010-05-28T21:03:38+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("2010-05-28T21:03:38+0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("2010-05-28T21:03:38-0000")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("2010-05-28T21:03:38-00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
d = RSDateWithString("2010-05-28T21:03:38Z")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 7, 13, 17, 6, 40)
|
||||
d = RSDateWithString("2010-07-13T17:06:40+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 4, 30, 12, 0, 0)
|
||||
d = RSDateWithString("30 Apr 2010 5:00 PDT")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 5, 21, 21, 22, 53)
|
||||
d = RSDateWithString("21 May 2010 21:22:53 GMT")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 6, 9, 5, 0, 0)
|
||||
d = RSDateWithString("Wed, 09 Jun 2010 00:00 EST")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 6, 23, 3, 43, 50)
|
||||
d = RSDateWithString("Wed, 23 Jun 2010 03:43:50 Z")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 6, 22, 3, 57, 49)
|
||||
d = RSDateWithString("2010-06-22T03:57:49+00:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
|
||||
expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07)
|
||||
d = RSDateWithString("2010-11-17T08:40:07-05:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testAtomDateWithMissingTCharacter() {
|
||||
let expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07)
|
||||
let d = RSDateWithString("2010-11-17 08:40:07-05:00")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testFeedbinDate() {
|
||||
let expectedDateResult = Self.dateWithValues(2019, 9, 27, 21, 01, 48)
|
||||
let d = RSDateWithString("2019-09-27T21:01:48.000000Z")
|
||||
XCTAssertEqual(d, expectedDateResult)
|
||||
}
|
||||
|
||||
func testHighMillisecondDate() {
|
||||
let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56, 516)
|
||||
let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00")
|
||||
XCTAssertEqual(d!.timeIntervalSince1970, expectedDateResult.timeIntervalSince1970, accuracy: 0.000001)
|
||||
}
|
||||
}
|
@ -9,20 +9,28 @@
|
||||
import XCTest
|
||||
import Parser
|
||||
|
||||
class RSSInJSONParserTests: XCTestCase {
|
||||
final class RSSInJSONParserTests: XCTestCase {
|
||||
|
||||
func testScriptingNewsPerformance() {
|
||||
|
||||
// 0.003 sec on my 2012 iMac.
|
||||
let d = parserData("ScriptingNews", "json", "http://scripting.com/")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
func testFeedLanguage() {
|
||||
let d = parserData("ScriptingNews", "json", "http://scripting.com/")
|
||||
let parsedFeed = try! FeedParser.parseSync(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.language, "en-us")
|
||||
}
|
||||
}
|
||||
|
||||
extension FeedParser {
|
||||
|
||||
static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
|
||||
|
||||
try FeedParser.parse(urlString: parserData.url, data: parserData.data)
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
import XCTest
|
||||
import Parser
|
||||
|
||||
class RSSParserTests: XCTestCase {
|
||||
final class RSSParserTests: XCTestCase {
|
||||
|
||||
func testScriptingNewsPerformance() {
|
||||
|
||||
@ -17,7 +17,7 @@ class RSSParserTests: XCTestCase {
|
||||
// 0.002 2022 Mac Studio
|
||||
let d = parserData("scriptingNews", "rss", "http://scripting.com/")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ class RSSParserTests: XCTestCase {
|
||||
// 0.001 2022 Mac Studio
|
||||
let d = parserData("KatieFloyd", "rss", "http://katiefloyd.com/")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
@ -37,7 +37,7 @@ class RSSParserTests: XCTestCase {
|
||||
// 0.0004 2022 Mac Studio
|
||||
let d = parserData("EMarley", "rss", "https://medium.com/@emarley")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
@ -47,21 +47,23 @@ class RSSParserTests: XCTestCase {
|
||||
// 0.0006 2022 Mac Studio
|
||||
let d = parserData("manton", "rss", "http://manton.org/")
|
||||
self.measure {
|
||||
let _ = try! FeedParser.parseSync(d)
|
||||
let _ = try! FeedParser.parse(d)
|
||||
}
|
||||
}
|
||||
|
||||
func testNatashaTheRobot() async {
|
||||
|
||||
let d = parserData("natasha", "xml", "https://www.natashatherobot.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.items.count, 10)
|
||||
}
|
||||
|
||||
func testTheOmniShowAttachments() async {
|
||||
|
||||
let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotNil(article.attachments)
|
||||
@ -78,7 +80,9 @@ class RSSParserTests: XCTestCase {
|
||||
func testTheOmniShowUniqueIDs() async {
|
||||
|
||||
let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotNil(article.uniqueID)
|
||||
@ -91,7 +95,7 @@ class RSSParserTests: XCTestCase {
|
||||
// Macworld’s feed doesn’t have guids, so they should be calculated unique IDs.
|
||||
|
||||
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotNil(article.uniqueID)
|
||||
@ -104,7 +108,9 @@ class RSSParserTests: XCTestCase {
|
||||
// Macworld uses names instead of email addresses (despite the RSS spec saying they should be email addresses).
|
||||
|
||||
let d = parserData("macworld", "rss", "https://www.macworld.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
|
||||
@ -123,7 +129,9 @@ class RSSParserTests: XCTestCase {
|
||||
// detect this situation, and every article in the feed should have a permalink.
|
||||
|
||||
let d = parserData("monkeydom", "rss", "https://coding.monkeydom.de/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNil(article.url)
|
||||
@ -136,7 +144,9 @@ class RSSParserTests: XCTestCase {
|
||||
// https://github.com/brentsimmons/NetNewsWire/issues/529
|
||||
|
||||
let d = parserData("atp", "rss", "http://atp.fm/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotNil(article.contentHTML)
|
||||
@ -145,7 +155,10 @@ class RSSParserTests: XCTestCase {
|
||||
|
||||
func testFeedKnownToHaveGuidsThatArentPermalinks() async {
|
||||
let d = parserData("livemint", "xml", "https://www.livemint.com/rss/news")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNil(article.url)
|
||||
}
|
||||
@ -155,7 +168,10 @@ class RSSParserTests: XCTestCase {
|
||||
// This feed uses atom authors, and we don’t want author/title to be used as item/title.
|
||||
// https://github.com/brentsimmons/NetNewsWire/issues/943
|
||||
let d = parserData("cloudblog", "rss", "https://cloudblog.withgoogle.com/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotEqual(article.title, "Product Manager, Office of the CTO")
|
||||
XCTAssertNotEqual(article.title, "Developer Programs Engineer")
|
||||
@ -167,26 +183,29 @@ class RSSParserTests: XCTestCase {
|
||||
// This invalid feed has <image> elements inside <item>s.
|
||||
// 17 Jan 2021 bug report — we’re not parsing titles in this feed.
|
||||
let d = parserData("aktuality", "rss", "https://www.aktuality.sk/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
for article in parsedFeed.items {
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
|
||||
XCTAssertTrue(parsedFeed.items.count > 0)
|
||||
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotNil(article.title)
|
||||
}
|
||||
}
|
||||
|
||||
func testFeedLanguage() async {
|
||||
let d = parserData("manton", "rss", "http://manton.org/")
|
||||
let parsedFeed = try! await FeedParser.parse(d)!
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssertEqual(parsedFeed.language, "en-US")
|
||||
}
|
||||
|
||||
// func testFeedWithGB2312Encoding() {
|
||||
// // This feed has an encoding we don’t run into very often.
|
||||
// // https://github.com/Ranchero-Software/NetNewsWire/issues/1477
|
||||
// let d = parserData("kc0011", "rss", "http://kc0011.net/")
|
||||
// let parsedFeed = try! FeedParser.parse(d)!
|
||||
// XCTAssert(parsedFeed.items.count > 0)
|
||||
// for article in parsedFeed.items {
|
||||
// XCTAssertNotNil(article.contentHTML)
|
||||
// }
|
||||
// }
|
||||
func testFeedWithGB2312Encoding() {
|
||||
// This feed has an encoding we don’t run into very often.
|
||||
// https://github.com/Ranchero-Software/NetNewsWire/issues/1477
|
||||
let d = parserData("kc0011", "rss", "http://kc0011.net/")
|
||||
let parsedFeed = try! FeedParser.parse(d)!
|
||||
XCTAssert(parsedFeed.items.count > 0)
|
||||
for article in parsedFeed.items {
|
||||
XCTAssertNotNil(article.contentHTML)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
8
Modules/ParserObjC/.gitignore
vendored
8
Modules/ParserObjC/.gitignore
vendored
@ -1,8 +0,0 @@
|
||||
.DS_Store
|
||||
/.build
|
||||
/Packages
|
||||
xcuserdata/
|
||||
DerivedData/
|
||||
.swiftpm/configuration/registries.json
|
||||
.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
|
||||
.netrc
|
@ -1,26 +0,0 @@
|
||||
// swift-tools-version: 5.10
|
||||
// The swift-tools-version declares the minimum version of Swift required to build this package.
|
||||
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "ParserObjC",
|
||||
platforms: [.macOS(.v14), .iOS(.v17)],
|
||||
products: [
|
||||
// Products define the executables and libraries a package produces, making them visible to other packages.
|
||||
.library(
|
||||
name: "ParserObjC",
|
||||
type: .dynamic,
|
||||
targets: ["ParserObjC"]),
|
||||
],
|
||||
targets: [
|
||||
// Targets are the basic building blocks of a package, defining a module or a test suite.
|
||||
// Targets can depend on other targets in this package and products from dependencies.
|
||||
.target(
|
||||
name: "ParserObjC",
|
||||
cSettings: [
|
||||
.headerSearchPath("include")
|
||||
]
|
||||
),
|
||||
]
|
||||
)
|
@ -1,24 +0,0 @@
|
||||
//
|
||||
// FeedParser.h
|
||||
// RSXML
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
@class RSParsedFeed;
|
||||
@class RSXMLData;
|
||||
|
||||
|
||||
@protocol FeedParser <NSObject>
|
||||
|
||||
+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData;
|
||||
|
||||
- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData;
|
||||
|
||||
- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error;
|
||||
|
||||
|
||||
@end
|
@ -1,26 +0,0 @@
|
||||
//
|
||||
// NSData+RSParser.h
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/24/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
|
||||
@interface NSData (RSParser)
|
||||
|
||||
- (BOOL)isProbablyHTML;
|
||||
- (BOOL)isProbablyXML;
|
||||
- (BOOL)isProbablyJSON;
|
||||
|
||||
- (BOOL)isProbablyJSONFeed;
|
||||
- (BOOL)isProbablyRSSInJSON;
|
||||
- (BOOL)isProbablyRSS;
|
||||
- (BOOL)isProbablyAtom;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
|
@ -1,139 +0,0 @@
|
||||
//
|
||||
// NSData+RSParser.m
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 6/24/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
#import "NSData+RSParser.h"
|
||||
|
||||
|
||||
|
||||
|
||||
/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/
|
||||
|
||||
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes);
|
||||
static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes);
|
||||
|
||||
@implementation NSData (RSParser)
|
||||
|
||||
- (BOOL)isProbablyHTML {
|
||||
|
||||
return bytesAreProbablyHTML(self.bytes, self.length);
|
||||
}
|
||||
|
||||
- (BOOL)isProbablyXML {
|
||||
|
||||
return bytesAreProbablyXML(self.bytes, self.length);
|
||||
}
|
||||
|
||||
- (BOOL)isProbablyJSON {
|
||||
|
||||
return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length);
|
||||
}
|
||||
|
||||
- (BOOL)isProbablyJSONFeed {
|
||||
|
||||
if (![self isProbablyJSON]) {
|
||||
return NO;
|
||||
}
|
||||
return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length);
|
||||
}
|
||||
|
||||
- (BOOL)isProbablyRSSInJSON {
|
||||
|
||||
if (![self isProbablyJSON]) {
|
||||
return NO;
|
||||
}
|
||||
const char *bytes = self.bytes;
|
||||
NSUInteger length = self.length;
|
||||
return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length);
|
||||
}
|
||||
|
||||
- (BOOL)isProbablyRSS {
|
||||
|
||||
if (didFindString("<rss", self.bytes, self.length) || didFindString("<rdf:RDF", self.bytes, self.length)) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
// At this writing (7 Dec. 2017), https://www.natashatherobot.com/feed/ is missing an opening <rss> tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find.
|
||||
return (didFindString("<channel>", self.bytes, self.length) && didFindString("<pubDate>", self.bytes, self.length));
|
||||
}
|
||||
|
||||
- (BOOL)isProbablyAtom {
|
||||
|
||||
return didFindString("<feed", self.bytes, self.length);
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
|
||||
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
char *foundString = strnstr(bytes, string, numberOfBytes);
|
||||
return foundString != NULL;
|
||||
}
|
||||
|
||||
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
NSUInteger i = 0;
|
||||
for (i = 0; i < numberOfBytes; i++) {
|
||||
|
||||
const char ch = bytes[i];
|
||||
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch == string[0]) {
|
||||
return strnstr(bytes, string, numberOfBytes) == bytes + i;
|
||||
}
|
||||
|
||||
// Allow for a BOM of up to four bytes. ASSUMPTION: BOM will only be at the start of the data.
|
||||
if (i < 4) continue;
|
||||
|
||||
break;
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
if (didFindString("<html", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("<HTML", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
if (didFindString("<body", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("<meta", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
if (didFindString("<", bytes, numberOfBytes)) {
|
||||
if (didFindString("doctype html", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
|
||||
return YES;
|
||||
}
|
||||
}
|
||||
|
||||
return NO;
|
||||
}
|
||||
|
||||
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes) {
|
||||
|
||||
return bytesStartWithStringIgnoringWhitespace("<?xml", bytes, numberOfBytes);
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
//
|
||||
// NSString+RSParser.h
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 9/25/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@interface NSString (RSParser)
|
||||
|
||||
- (NSString *)rsparser_stringByDecodingHTMLEntities;
|
||||
|
||||
/// Returns a copy of \c self with <, >, and & entity-encoded.
|
||||
@property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities;
|
||||
|
||||
- (NSString *)rsparser_md5Hash;
|
||||
|
||||
- (BOOL)rsparser_contains:(NSString *)s;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
@ -1,348 +0,0 @@
|
||||
//
|
||||
// NSString+RSParser.m
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 9/25/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
#import "NSString+RSParser.h"
|
||||
#import <CommonCrypto/CommonDigest.h>
|
||||
|
||||
|
||||
|
||||
|
||||
@interface NSScanner (RSParser)
|
||||
|
||||
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation NSString (RSParser)
|
||||
|
||||
- (BOOL)rsparser_contains:(NSString *)s {
|
||||
|
||||
return [self rangeOfString:s].location != NSNotFound;
|
||||
}
|
||||
|
||||
- (NSString *)rsparser_stringByDecodingHTMLEntities {
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
NSScanner *scanner = [[NSScanner alloc] initWithString:self];
|
||||
scanner.charactersToBeSkipped = nil;
|
||||
NSMutableString *result = [[NSMutableString alloc] init];
|
||||
|
||||
while (true) {
|
||||
|
||||
NSString *scannedString = nil;
|
||||
if ([scanner scanUpToString:@"&" intoString:&scannedString]) {
|
||||
[result appendString:scannedString];
|
||||
}
|
||||
if (scanner.isAtEnd) {
|
||||
break;
|
||||
}
|
||||
NSUInteger savedScanLocation = scanner.scanLocation;
|
||||
|
||||
NSString *decodedEntity = nil;
|
||||
if ([scanner rs_scanEntityValue:&decodedEntity]) {
|
||||
[result appendString:decodedEntity];
|
||||
}
|
||||
else {
|
||||
[result appendString:@"&"];
|
||||
scanner.scanLocation = savedScanLocation + 1;
|
||||
}
|
||||
|
||||
if (scanner.isAtEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ([self isEqualToString:result]) {
|
||||
return self;
|
||||
}
|
||||
return [result copy];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static NSDictionary *RSEntitiesDictionary(void);
|
||||
static NSString *RSParserStringWithValue(uint32_t value);
|
||||
|
||||
- (NSString * _Nullable)rs_stringByDecodingEntity {
|
||||
|
||||
// self may or may not have outer & and ; characters.
|
||||
|
||||
NSMutableString *s = [self mutableCopy];
|
||||
|
||||
if ([s hasPrefix:@"&"]) {
|
||||
[s deleteCharactersInRange:NSMakeRange(0, 1)];
|
||||
}
|
||||
if ([s hasSuffix:@";"]) {
|
||||
[s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)];
|
||||
}
|
||||
|
||||
NSDictionary *entitiesDictionary = RSEntitiesDictionary();
|
||||
|
||||
NSString *decodedEntity = entitiesDictionary[self];
|
||||
if (decodedEntity) {
|
||||
return decodedEntity;
|
||||
}
|
||||
|
||||
if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex
|
||||
NSScanner *scanner = [[NSScanner alloc] initWithString:s];
|
||||
scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"];
|
||||
unsigned int hexValue = 0;
|
||||
if ([scanner scanHexInt:&hexValue]) {
|
||||
return RSParserStringWithValue((uint32_t)hexValue);
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
else if ([s hasPrefix:@"#"]) {
|
||||
[s deleteCharactersInRange:NSMakeRange(0, 1)];
|
||||
NSInteger value = s.integerValue;
|
||||
if (value < 1) {
|
||||
return nil;
|
||||
}
|
||||
return RSParserStringWithValue((uint32_t)value);
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
- (NSString *)rsparser_stringByEncodingRequiredEntities {
|
||||
NSMutableString *result = [NSMutableString string];
|
||||
|
||||
for (NSUInteger i = 0; i < self.length; ++i) {
|
||||
unichar c = [self characterAtIndex:i];
|
||||
|
||||
switch (c) {
|
||||
case '<':
|
||||
[result appendString:@"<"];
|
||||
break;
|
||||
case '>':
|
||||
[result appendString:@">"];
|
||||
break;
|
||||
case '&':
|
||||
[result appendString:@"&"];
|
||||
break;
|
||||
default:
|
||||
[result appendFormat:@"%C", c];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return [result copy];
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
- (NSData *)_rsparser_md5HashData {
|
||||
|
||||
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
|
||||
unsigned char hash[CC_MD5_DIGEST_LENGTH];
|
||||
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
|
||||
|
||||
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
- (NSString *)rsparser_md5Hash {
|
||||
|
||||
NSData *md5Data = [self _rsparser_md5HashData];
|
||||
const Byte *bytes = md5Data.bytes;
|
||||
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
@implementation NSScanner (RSParser)
|
||||
|
||||
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
|
||||
|
||||
NSString *s = self.string;
|
||||
NSUInteger initialScanLocation = self.scanLocation;
|
||||
static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity.
|
||||
|
||||
while (true) {
|
||||
|
||||
unichar ch = [s characterAtIndex:self.scanLocation];
|
||||
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
|
||||
break;
|
||||
}
|
||||
if (ch == ';') {
|
||||
if (!decodedEntity) {
|
||||
return YES;
|
||||
}
|
||||
NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
|
||||
*decodedEntity = [rawEntity rs_stringByDecodingEntity];
|
||||
self.scanLocation = self.scanLocation + 1;
|
||||
return *decodedEntity != nil;
|
||||
}
|
||||
|
||||
self.scanLocation = self.scanLocation + 1;
|
||||
if (self.scanLocation - initialScanLocation > maxEntityLength) {
|
||||
break;
|
||||
}
|
||||
if (self.isAtEnd) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NO;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
static NSString *RSParserStringWithValue(uint32_t value) {
|
||||
// From WebCore's HTMLEntityParser
|
||||
static const uint32_t windowsLatin1ExtensionArray[32] = {
|
||||
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
|
||||
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
|
||||
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
|
||||
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
|
||||
};
|
||||
|
||||
if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160
|
||||
value = windowsLatin1ExtensionArray[value - 0x80];
|
||||
}
|
||||
|
||||
value = CFSwapInt32HostToLittle(value);
|
||||
|
||||
return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding];
|
||||
}
|
||||
|
||||
static NSDictionary *RSEntitiesDictionary(void) {
|
||||
|
||||
static NSDictionary *entitiesDictionary = nil;
|
||||
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
|
||||
entitiesDictionary = @{
|
||||
// Named entities
|
||||
@"AElig": @"Æ",
|
||||
@"Aacute": @"Á",
|
||||
@"Acirc": @"Â",
|
||||
@"Agrave": @"À",
|
||||
@"Aring": @"Å",
|
||||
@"Atilde": @"Ã",
|
||||
@"Auml": @"Ä",
|
||||
@"Ccedil": @"Ç",
|
||||
@"Dstrok": @"Ð",
|
||||
@"ETH": @"Ð",
|
||||
@"Eacute": @"É",
|
||||
@"Ecirc": @"Ê",
|
||||
@"Egrave": @"È",
|
||||
@"Euml": @"Ë",
|
||||
@"Iacute": @"Í",
|
||||
@"Icirc": @"Î",
|
||||
@"Igrave": @"Ì",
|
||||
@"Iuml": @"Ï",
|
||||
@"Ntilde": @"Ñ",
|
||||
@"Oacute": @"Ó",
|
||||
@"Ocirc": @"Ô",
|
||||
@"Ograve": @"Ò",
|
||||
@"Oslash": @"Ø",
|
||||
@"Otilde": @"Õ",
|
||||
@"Ouml": @"Ö",
|
||||
@"Pi": @"Π",
|
||||
@"THORN": @"Þ",
|
||||
@"Uacute": @"Ú",
|
||||
@"Ucirc": @"Û",
|
||||
@"Ugrave": @"Ù",
|
||||
@"Uuml": @"Ü",
|
||||
@"Yacute": @"Y",
|
||||
@"aacute": @"á",
|
||||
@"acirc": @"â",
|
||||
@"acute": @"´",
|
||||
@"aelig": @"æ",
|
||||
@"agrave": @"à",
|
||||
@"amp": @"&",
|
||||
@"apos": @"'",
|
||||
@"aring": @"å",
|
||||
@"atilde": @"ã",
|
||||
@"auml": @"ä",
|
||||
@"brkbar": @"¦",
|
||||
@"brvbar": @"¦",
|
||||
@"ccedil": @"ç",
|
||||
@"cedil": @"¸",
|
||||
@"cent": @"¢",
|
||||
@"copy": @"©",
|
||||
@"curren": @"¤",
|
||||
@"deg": @"°",
|
||||
@"die": @"¨",
|
||||
@"divide": @"÷",
|
||||
@"eacute": @"é",
|
||||
@"ecirc": @"ê",
|
||||
@"egrave": @"è",
|
||||
@"eth": @"ð",
|
||||
@"euml": @"ë",
|
||||
@"euro": @"€",
|
||||
@"frac12": @"½",
|
||||
@"frac14": @"¼",
|
||||
@"frac34": @"¾",
|
||||
@"gt": @">",
|
||||
@"hearts": @"♥",
|
||||
@"hellip": @"…",
|
||||
@"iacute": @"í",
|
||||
@"icirc": @"î",
|
||||
@"iexcl": @"¡",
|
||||
@"igrave": @"ì",
|
||||
@"iquest": @"¿",
|
||||
@"iuml": @"ï",
|
||||
@"laquo": @"«",
|
||||
@"ldquo": @"“",
|
||||
@"lsquo": @"‘",
|
||||
@"lt": @"<",
|
||||
@"macr": @"¯",
|
||||
@"mdash": @"—",
|
||||
@"micro": @"µ",
|
||||
@"middot": @"·",
|
||||
@"ndash": @"–",
|
||||
@"not": @"¬",
|
||||
@"ntilde": @"ñ",
|
||||
@"oacute": @"ó",
|
||||
@"ocirc": @"ô",
|
||||
@"ograve": @"ò",
|
||||
@"ordf": @"ª",
|
||||
@"ordm": @"º",
|
||||
@"oslash": @"ø",
|
||||
@"otilde": @"õ",
|
||||
@"ouml": @"ö",
|
||||
@"para": @"¶",
|
||||
@"pi": @"π",
|
||||
@"plusmn": @"±",
|
||||
@"pound": @"£",
|
||||
@"quot": @"\"",
|
||||
@"raquo": @"»",
|
||||
@"rdquo": @"”",
|
||||
@"reg": @"®",
|
||||
@"rsquo": @"’",
|
||||
@"sect": @"§",
|
||||
@"shy": RSParserStringWithValue(173),
|
||||
@"sup1": @"¹",
|
||||
@"sup2": @"²",
|
||||
@"sup3": @"³",
|
||||
@"szlig": @"ß",
|
||||
@"thorn": @"þ",
|
||||
@"times": @"×",
|
||||
@"trade": @"™",
|
||||
@"uacute": @"ú",
|
||||
@"ucirc": @"û",
|
||||
@"ugrave": @"ù",
|
||||
@"uml": @"¨",
|
||||
@"uuml": @"ü",
|
||||
@"yacute": @"y",
|
||||
@"yen": @"¥",
|
||||
@"yuml": @"ÿ",
|
||||
@"infin": @"∞",
|
||||
@"nbsp": RSParserStringWithValue(160)
|
||||
};
|
||||
});
|
||||
|
||||
return entitiesDictionary;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user