From 4b646e42c29b25ebe71dfa5cf053905bfed78ca6 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 17:25:29 -0700 Subject: [PATCH 01/88] Write first version of Swift-based SAXParser. --- Mac/AppDelegate.swift | 3 + Modules/Parser/Sources/Parser/SAXParser.swift | 213 ++++++++++++++++++ .../Tests/ParserTests/RSDateParserTests.swift | 1 + iOS/AppDelegate.swift | 2 + 4 files changed, 219 insertions(+) create mode 100644 Modules/Parser/Sources/Parser/SAXParser.swift diff --git a/Mac/AppDelegate.swift b/Mac/AppDelegate.swift index d10af9671..22d92cc98 100644 --- a/Mac/AppDelegate.swift +++ b/Mac/AppDelegate.swift @@ -17,6 +17,7 @@ import OSLog import Core import CrashReporter import Images +import libxml2 // If we're not going to import Sparkle, provide dummy protocols to make it easy // for AppDelegate to comply @@ -104,6 +105,8 @@ import Sparkle override init() { + xmlInitParser() + NSWindow.allowsAutomaticWindowTabbing = false super.init() diff --git a/Modules/Parser/Sources/Parser/SAXParser.swift b/Modules/Parser/Sources/Parser/SAXParser.swift new file mode 100644 index 000000000..ce9bbf885 --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAXParser.swift @@ -0,0 +1,213 @@ +// +// SAXParser.swift. +// +// +// Created by Brent Simmons on 8/12/24. +// + +import Foundation +import libxml2 + +typealias XMLPointer = UnsafePointer + +protocol SAXParserDelegate { + + func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) + + func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) + + func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) + + func saxParser(_: SAXParser, internedStringForName: XMLPointer, prefix: XMLPointer?) -> String? + + func saxParser(_: SAXParser, internedStringForValue: XMLPointer, count: Int) -> String? +} + +final class SAXParser { + + fileprivate let delegate: SAXParserDelegate + + var currentCharacters: Data? { // UTF-8 encoded + + guard storingCharacters else { + return nil + } + return characters + } + + // Conveniences to get string version of currentCharacters + + var currentString: String? { + + guard let d = currentCharacters, !d.isEmpty else { + return nil + } + return String(data: d, encoding: .utf8) + } + + var currentStringWithTrimmedWhitespace: String? { + + guard let s = currentString else { + return nil + } + return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) + } + + private var data: Data + private var storingCharacters = false + private var characters = Data() + + init(delegate: SAXParserDelegate, data: Data) { + + self.delegate = delegate + self.data = data + } + + func parse() { + + guard !data.isEmpty else { + return + } + + let context = xmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil) + xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue)) + + data.withUnsafeBytes { bufferPointer in + if let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress { + xmlParseChunk(context, bytes, CInt(data.count), 0) + } + } + + xmlParseChunk(context, nil, 0, 1) + xmlFreeParserCtxt(context) + } + + /// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement. + func beginStoringCharacters() { + + storingCharacters = true + characters.count = 0 + } + + func endStoringCharacters() { + + storingCharacters = false + characters.count = 0 + } + + func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> [String: String]? { + + guard attributeCount > 0, let attributes else { + return nil + } + + var dictionary = [String: String]() + + let fieldCount = 5 + var i = 0, j = 0 + while i < attributeCount { + + guard let attribute = attributes[j] else { + continue + } + let prefix = attributes[j + 1] + var attributeName = delegate.saxParser(self, internedStringForName: attribute, prefix: prefix) + if attributeName == nil { + attributeName = String(cString: attribute) + if let prefix { + let attributePrefix = String(cString: prefix) + attributeName = "\(attributePrefix):\(attributeName!)" + } + } + + guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else { + continue + } + let valueCount = valueEnd - valueStart + + var value = delegate.saxParser(self, internedStringForValue: valueStart, count: Int(valueCount)) + if value == nil { + value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8) + } + + if let value, let attributeName { + dictionary[attributeName] = value + } + + i += 1 + j += fieldCount + } + + return dictionary + } +} + +private extension SAXParser { + + func charactersFound(_ xmlCharacters: XMLPointer, count: Int) { + + if storingCharacters { + characters.append(xmlCharacters, count: count) + } + + delegate.saxParser(self, xmlCharactersFound: xmlCharacters, count: count) + } + + func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) { + + delegate.saxParser(self, xmlStartElement: name, prefix: prefix, uri: uri, namespaceCount: namespaceCount, namespaces: namespaces, attributeCount: attributeCount, attributesDefaultedCount: attributesDefaultedCount, attributes: attributes) + } + + func endElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + delegate.saxParser(self, xmlEndElement: name, prefix: prefix, uri: uri) + endStoringCharacters() + } +} + +private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer?) { + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.startElement(name, prefix: prefix, uri: URI, namespaceCount: Int(nb_namespaces), namespaces: namespaces, attributeCount: Int(nb_attributes), attributesDefaultedCount: Int(nb_defaulted), attributes: attributes) +} + +private func endElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?) { + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.endElement(name, prefix: prefix, uri: URI) +} + +private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) { + + guard let context, let ch, len > 0 else { + return + } + + let parser = parser(from: context) + parser.charactersFound(ch, count: Int(len)) +} + +private func parser(from context: UnsafeMutableRawPointer) -> SAXParser { + + Unmanaged.fromOpaque(context).takeUnretainedValue() +} + +nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = { + + var handler = xmlSAXHandler() + + handler.characters = charactersFound + handler.startElement = startElement + handler.endElement = endElement + + return handler +}() + diff --git a/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift b/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift index d12df35ad..e9984933c 100644 --- a/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift @@ -8,6 +8,7 @@ import Foundation import XCTest import Parser +import ParserObjC class RSDateParserTests: XCTestCase { diff --git a/iOS/AppDelegate.swift b/iOS/AppDelegate.swift index 7a6dc68a7..56bda163d 100644 --- a/iOS/AppDelegate.swift +++ b/iOS/AppDelegate.swift @@ -55,6 +55,8 @@ class AppDelegate: UIResponder, UIApplicationDelegate, UNUserNotificationCenterD override init() { + xmlInitParser() + super.init() appDelegate = self From d13f0f48edf54d16fe878653ab8e2d56e6a26c47 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 18:18:25 -0700 Subject: [PATCH 02/88] Continue porting ParserObjC to Swift. --- Modules/Parser/Package.swift | 5 ++- .../Sources/Parser/Feeds/FeedParser.swift | 1 - .../Sources/Parser/Feeds/FeedType.swift | 3 -- .../Parser/Feeds/JSON/JSONFeedParser.swift | 3 -- .../Parser/Feeds/JSON/RSSInJSONParser.swift | 3 -- .../Sources/Parser/Feeds/ParsedAuthor.swift | 12 +++++++ .../Sources/Parser/Feeds/XML/AtomParser.swift | 4 --- .../Feeds/XML/RSParsedFeedTransformer.swift | 3 -- .../Sources/Parser/Feeds/XML/RSSParser.swift | 1 - .../Sources/Parser/ParserData+Parser.swift | 11 ------ .../Parser/Sources/Parser/ParserData.swift | 19 +++++++++++ .../Parser/RSHTMLMetadata+Parser.swift | 1 - .../Sources/ParserObjC/FeedParser.h | 24 ------------- .../Sources/ParserObjC/ParserData.h | 24 ------------- .../Sources/ParserObjC/ParserData.m | 26 -------------- .../Sources/ParserObjC/RSParsedAuthor.h | 19 ----------- .../Sources/ParserObjC/RSParsedAuthor.m | 34 ------------------- 17 files changed, 33 insertions(+), 160 deletions(-) delete mode 100644 Modules/Parser/Sources/Parser/ParserData+Parser.swift create mode 100644 Modules/Parser/Sources/Parser/ParserData.swift delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/FeedParser.h delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/ParserData.h delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/ParserData.m delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 3d831e4d8..6d7320717 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -14,20 +14,19 @@ let package = Package( targets: ["Parser"]), ], dependencies: [ - .package(path: "../ParserObjC"), ], targets: [ // Targets are the basic building blocks of a package. A target can define a module or a test suite. // Targets can depend on other targets in this package, and on products in packages this package depends on. .target( name: "Parser", - dependencies: ["ParserObjC"], + dependencies: [], swiftSettings: [ .enableExperimentalFeature("StrictConcurrency") ]), .testTarget( name: "ParserTests", - dependencies: ["Parser", "ParserObjC"], + dependencies: ["Parser"], exclude: ["Info.plist"], resources: [.copy("Resources")]), ] diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift b/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift index 4c5eb3338..a9417e31f 100644 --- a/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift @@ -7,7 +7,6 @@ // import Foundation -import ParserObjC // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. // You don’t need to know the type of feed. diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedType.swift b/Modules/Parser/Sources/Parser/Feeds/FeedType.swift index 6638b6543..4dcaaa02c 100644 --- a/Modules/Parser/Sources/Parser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/Parser/Feeds/FeedType.swift @@ -7,9 +7,6 @@ // import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif public enum FeedType: Sendable { case rss diff --git a/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift index 733f0f92e..0e765961d 100644 --- a/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift @@ -7,9 +7,6 @@ // import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif // See https://jsonfeed.org/version/1.1 diff --git a/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift index ad484f6b9..74e6b0658 100644 --- a/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift @@ -7,9 +7,6 @@ // import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif // See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md // Also: http://cyber.harvard.edu/rss/rss.html diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift index 7b7d5165e..3b97cba59 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift +++ b/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift @@ -22,6 +22,18 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { self.emailAddress = emailAddress } + /// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.) + convenience init(singleString: String) { + + if singleString.contains("@") { + init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) + } else if singleString.lowercased.hasPrefix("http") { + init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) + } else { + init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) + } + } + // MARK: - Hashable public func hash(into hasher: inout Hasher) { diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift index 93e01dcd3..151349af7 100644 --- a/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift @@ -8,10 +8,6 @@ import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif - // RSSParser wraps the Objective-C RSAtomParser. // // The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift b/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift index 27a5772c3..c6d0b2ba6 100644 --- a/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift +++ b/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift @@ -7,9 +7,6 @@ // import Foundation -#if SWIFT_PACKAGE -import ParserObjC -#endif // RSRSSParser and RSAtomParser were written in Objective-C quite a while ago. // They create an RSParsedFeed object and related Objective-C objects. diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift index 85b88d83f..885790e16 100644 --- a/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift @@ -7,7 +7,6 @@ // import Foundation -import ParserObjC // RSSParser wraps the Objective-C RSRSSParser. // diff --git a/Modules/Parser/Sources/Parser/ParserData+Parser.swift b/Modules/Parser/Sources/Parser/ParserData+Parser.swift deleted file mode 100644 index 1563bafd9..000000000 --- a/Modules/Parser/Sources/Parser/ParserData+Parser.swift +++ /dev/null @@ -1,11 +0,0 @@ -// -// File.swift -// -// -// Created by Brent Simmons on 4/7/24. -// - -import Foundation -import ParserObjC - -extension ParserData: @unchecked Sendable {} diff --git a/Modules/Parser/Sources/Parser/ParserData.swift b/Modules/Parser/Sources/Parser/ParserData.swift new file mode 100644 index 000000000..1ef7e822a --- /dev/null +++ b/Modules/Parser/Sources/Parser/ParserData.swift @@ -0,0 +1,19 @@ +// +// ParserData.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct ParserData: Sendable { + + let url: String + let data: Data + + public init(url: String, data: Data) { + self.url = url + self.data = data + } +} diff --git a/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift b/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift index de80eb2ce..391380b22 100644 --- a/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift +++ b/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift @@ -6,6 +6,5 @@ // import Foundation -import ParserObjC extension RSHTMLMetadataParser: @unchecked Sendable {} diff --git a/Modules/ParserObjC/Sources/ParserObjC/FeedParser.h b/Modules/ParserObjC/Sources/ParserObjC/FeedParser.h deleted file mode 100755 index 0f8df6b07..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/FeedParser.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// FeedParser.h -// RSXML -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedFeed; -@class RSXMLData; - - -@protocol FeedParser - -+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData; - -- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData; - -- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error; - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/ParserData.h b/Modules/ParserObjC/Sources/ParserObjC/ParserData.h deleted file mode 100644 index fe4885144..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/ParserData.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// ParserData.h -// RSParser -// -// Created by Brent Simmons on 10/4/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -__attribute__((swift_attr("@Sendable"))) -@interface ParserData : NSObject - -@property (nonatomic, readonly) NSString *url; -@property (nonatomic, readonly) NSData *data; - -- (instancetype)initWithURL:(NSString *)url data:(NSData *)data; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/ParserData.m b/Modules/ParserObjC/Sources/ParserObjC/ParserData.m deleted file mode 100644 index 68c5f0356..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/ParserData.m +++ /dev/null @@ -1,26 +0,0 @@ -// -// ParserData.m -// RSParser -// -// Created by Brent Simmons on 10/4/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "ParserData.h" - -@implementation ParserData - -- (instancetype)initWithURL:(NSString *)url data:(NSData *)data { - - self = [super init]; - if (!self) { - return nil; - } - - _url = url; - _data = data; - - return self; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h deleted file mode 100644 index 2c28236a2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSParsedAuthor.h -// RSParserTests -// -// Created by Brent Simmons on 12/19/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@interface RSParsedAuthor : NSObject - -@property (nonatomic, nullable) NSString *name; -@property (nonatomic, nullable) NSString *emailAddress; -@property (nonatomic, nullable) NSString *url; - -+ (instancetype _Nonnull )authorWithSingleString:(NSString *_Nonnull)s; // Don’t know which property it is. Guess based on contents of the string. Common with RSS. - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m deleted file mode 100644 index 154b546c8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedAuthor.m +++ /dev/null @@ -1,34 +0,0 @@ -// -// RSParsedAuthor.m -// RSParserTests -// -// Created by Brent Simmons on 12/19/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "NSString+RSParser.h" - -#import "RSParsedAuthor.h" - -@implementation RSParsedAuthor - -+ (instancetype)authorWithSingleString:(NSString *)s { - - // The author element in RSS is supposed to be email address — but often it’s a name, and sometimes a URL. - - RSParsedAuthor *author = [[self alloc] init]; - - if ([s rsparser_contains:@"@"]) { - author.emailAddress = s; - } - else if ([s.lowercaseString hasPrefix:@"http"]) { - author.url = s; - } - else { - author.name = s; - } - - return author; -} - -@end From 5ec0964594515201bd531291c90edd3a1f30cbe6 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 18:58:53 -0700 Subject: [PATCH 03/88] Port RSHTMLTag and RSOMLFeedSpecifier to Swift. --- .../Parser/Sources/Parser/HTML/HTMLTag.swift | 24 +++++++++ .../Parser/OPML/OPMLFeedSpecifier.swift | 40 +++++++++++++++ .../Parser/Utilities/String+RSParser.swift | 6 +++ .../ParserObjC/Sources/ParserObjC/RSHTMLTag.h | 33 ------------ .../ParserObjC/Sources/ParserObjC/RSHTMLTag.m | 43 ---------------- .../Sources/ParserObjC/RSOPMLFeedSpecifier.h | 24 --------- .../Sources/ParserObjC/RSOPMLFeedSpecifier.m | 51 ------------------- 7 files changed, 70 insertions(+), 151 deletions(-) create mode 100644 Modules/Parser/Sources/Parser/HTML/HTMLTag.swift create mode 100644 Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m diff --git a/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift b/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift new file mode 100644 index 000000000..27acc83aa --- /dev/null +++ b/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift @@ -0,0 +1,24 @@ +// +// HTMLTag.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct HTMLTag: Sendable { + + public enum HTMLTagType { + case link + case meta + } + + public let tagType: HTMLTagType + public let attributes: [String: String]? + + public init(tagType: TagType, attributes: [String : String]?) { + self.tagType = tagType + self.attributes = attributes + } +} diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift new file mode 100644 index 000000000..ee6b1b456 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift @@ -0,0 +1,40 @@ +// +// File.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct OPMLFeedSpecifier: Sendable { + + let title: String? + let feedDescription: String? + let homePageURL: String? + let feedURL: String + + init(title: String?, feedDescription: String?, homePageURL: String?, feedURL: String) { + + if String.isEmptyOrNil(title) { + self.title = nil + } else { + self.title = title + } + + if String.isEmptyOrNil(feedDescription) { + self.feedDescription = nil + } else { + self.feedDescription = feedDescription + } + + if String.isEmptyOrNil(homePageURL) { + self.homePageURL = nil + } else { + self.homePageURL = homePageURL + } + + self.feedURL = feedURL + } +} + diff --git a/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift b/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift index 9922be93a..9ed9c0b51 100644 --- a/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift +++ b/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift @@ -14,4 +14,10 @@ extension String { return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self } + static func isEmptyOrNil(_ s: String?) { + if let s { + return s.isEmpty + } + return true + } } diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h deleted file mode 100644 index e8e9cb426..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.h +++ /dev/null @@ -1,33 +0,0 @@ -// -// RSHTMLTag.h -// RSParser -// -// Created by Brent Simmons on 11/26/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -extern NSString *RSHTMLTagNameLink; // @"link" -extern NSString *RSHTMLTagNameMeta; // @"meta" - -typedef NS_ENUM(NSInteger, RSHTMLTagType) { - RSHTMLTagTypeLink, - RSHTMLTagTypeMeta -}; - -@interface RSHTMLTag : NSObject - -- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes; - -+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes; -+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes; - -@property (nonatomic, readonly) RSHTMLTagType type; -@property (nonatomic, readonly) NSDictionary *attributes; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m deleted file mode 100644 index 5b0262ffb..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLTag.m +++ /dev/null @@ -1,43 +0,0 @@ -// -// RSHTMLTag.m -// RSParser -// -// Created by Brent Simmons on 11/26/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLTag.h" - -NSString *RSHTMLTagNameLink = @"link"; -NSString *RSHTMLTagNameMeta = @"meta"; - -@implementation RSHTMLTag - -- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes { - - self = [super init]; - if (!self) { - return nil; - } - - _type = type; - _attributes = attributes; - - return self; -} - -+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes { - - return [[self alloc] initWithType:RSHTMLTagTypeLink attributes:attributes]; -} - -+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes { - - return [[self alloc] initWithType:RSHTMLTagTypeMeta attributes:attributes]; -} - -- (NSString *)description { - return [NSString stringWithFormat:@"<%@: %p> type: %ld attributes: %@", NSStringFromClass([self class]), self, (long)self.type, self.attributes]; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h deleted file mode 100755 index 8c4aea6b0..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSOPMLFeedSpecifier.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSOPMLFeedSpecifier : NSObject - -- (instancetype)initWithTitle:(NSString * _Nullable)title feedDescription:(NSString * _Nullable)feedDescription homePageURL:(NSString * _Nullable)homePageURL feedURL:(NSString *)feedURL; - -@property (nonatomic, nullable, readonly) NSString *title; -@property (nonatomic, nullable, readonly) NSString *feedDescription; -@property (nonatomic, nullable, readonly) NSString *homePageURL; -@property (nonatomic, readonly) NSString *feedURL; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m deleted file mode 100755 index bb32ccf54..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLFeedSpecifier.m +++ /dev/null @@ -1,51 +0,0 @@ -// -// RSOPMLFeedSpecifier.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLFeedSpecifier.h" -#import "RSParserInternal.h" - - - -@implementation RSOPMLFeedSpecifier - -- (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL { - - NSParameterAssert(!RSParserStringIsEmpty(feedURL)); - - self = [super init]; - if (!self) { - return nil; - } - - if (RSParserStringIsEmpty(title)) { - _title = nil; - } - else { - _title = title; - } - - if (RSParserStringIsEmpty(feedDescription)) { - _feedDescription = nil; - } - else { - _feedDescription = feedDescription; - } - - if (RSParserStringIsEmpty(homePageURL)) { - _homePageURL = nil; - } - else { - _homePageURL = homePageURL; - } - - _feedURL = feedURL; - - return self; -} - -@end From 1aaad155ddcd974871fb295bf8dfc0fc5842db2a Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 19:00:03 -0700 Subject: [PATCH 04/88] =?UTF-8?q?Delete=20RSParsedEnclosure=20=E2=80=94?= =?UTF-8?q?=C2=A0will=20use=20ParsedAttachment=20instead.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Sources/ParserObjC/RSParsedEnclosure.h | 22 ------------------- .../Sources/ParserObjC/RSParsedEnclosure.m | 13 ----------- 2 files changed, 35 deletions(-) delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h deleted file mode 100644 index 8fc9e404d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSParsedEnclosure.h -// RSParser -// -// Created by Brent Simmons on 12/18/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSParsedEnclosure : NSObject - -@property (nonatomic) NSString *url; -@property (nonatomic) NSInteger length; -@property (nonatomic, nullable) NSString *mimeType; -@property (nonatomic, nullable) NSString *title; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m deleted file mode 100644 index f6f35da59..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedEnclosure.m +++ /dev/null @@ -1,13 +0,0 @@ -// -// RSParsedEnclosure.m -// RSParser -// -// Created by Brent Simmons on 12/18/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "RSParsedEnclosure.h" - -@implementation RSParsedEnclosure - -@end From 95ef280698259e1547964e798be8f4223b7ed4fb Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 19:01:32 -0700 Subject: [PATCH 05/88] Delete not-really-needed RSOPMLError. --- .../Sources/ParserObjC/RSOPMLError.h | 19 ---------------- .../Sources/ParserObjC/RSOPMLError.m | 22 ------------------- 2 files changed, 41 deletions(-) delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h deleted file mode 100755 index 276c62ed7..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSOPMLError.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -extern NSString *RSOPMLErrorDomain; - - -typedef NS_ENUM(NSInteger, RSOPMLErrorCode) { - RSOPMLErrorCodeDataIsWrongFormat = 1024 -}; - - -NSError *RSOPMLWrongFormatError(NSString *fileName); diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m deleted file mode 100755 index 7aa3c5e9d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLError.m +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSOPMLError.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLError.h" - -NSString *RSOPMLErrorDomain = @"com.ranchero.OPML"; - -NSError *RSOPMLWrongFormatError(NSString *fileName) { - - NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file ‘%@’ can’t be parsed because it’s not an OPML file.", @"OPML wrong format"); - NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName]; - - NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format"); - NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString}; - - return [[NSError alloc] initWithDomain:RSOPMLErrorDomain code:RSOPMLErrorCodeDataIsWrongFormat userInfo:userInfo]; -} From ad00ee52ff6c151a870b5ae4627ab6c3b20f9da6 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 21:03:26 -0700 Subject: [PATCH 06/88] Port RSOPMLAttributes and RSOPMLItem to Swift. --- .../Sources/Parser/OPML/OPMLAttributes.swift | 53 +++++++++++ .../Parser/Sources/Parser/OPML/OPMLItem.swift | 42 +++++++++ .../Parser/Utilities/Dictionary+Parser.swift | 28 ++++++ ...ing+RSParser.swift => String+Parser.swift} | 0 .../Sources/ParserObjC/RSOPMLAttributes.h | 36 -------- .../Sources/ParserObjC/RSOPMLAttributes.m | 68 --------------- .../Sources/ParserObjC/RSOPMLItem.h | 30 ------- .../Sources/ParserObjC/RSOPMLItem.m | 87 ------------------- 8 files changed, 123 insertions(+), 221 deletions(-) create mode 100644 Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift create mode 100644 Modules/Parser/Sources/Parser/OPML/OPMLItem.swift create mode 100644 Modules/Parser/Sources/Parser/Utilities/Dictionary+Parser.swift rename Modules/Parser/Sources/Parser/Utilities/{String+RSParser.swift => String+Parser.swift} (100%) delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift b/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift new file mode 100644 index 000000000..0669e0e53 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift @@ -0,0 +1,53 @@ +// +// OPMLAttributes.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +// OPML allows for arbitrary attributes. +// These are the common attributes in OPML files used as RSS subscription lists. + +private static let opmlTextKey = "text" +private static let opmlTitleKey = "title" +private static let opmlDescriptionKey = "description" +private static let opmlTypeKey = "type" +private static let opmlVersionKey = "version" +private static let opmlHMTLURLKey = "htmlUrl" +private static let opmlXMLURLKey = "xmlUrl" + +// A frequent error in OPML files is to mess up the capitalization, +// so these do a case-insensitive lookup. + +extension Dictionary where Key == String, Value == String { + + var opml_text: String? { + object(forCaseInsensitiveKey: opmlTextKey) + } + + var opml_title: String? { + object(forCaseInsensitiveKey: opmlTitleKey) + } + + var opml_description: String? { + object(forCaseInsensitiveKey: opmlDescriptionKey) + } + + var opml_type: String? { + object(forCaseInsensitiveKey: opmlTypeKey) + } + + var opml_version: String? { + object(forCaseInsensitiveKey: opmlVersionKey) + } + + var opml_htmlUrl: String? { + object(forCaseInsensitiveKey: opmlHMTLURLKey) + } + + var opml_xmlUrl: String? { + object(forCaseInsensitiveKey: opmlXMLURLKey) + } +} diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift new file mode 100644 index 000000000..76a170060 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -0,0 +1,42 @@ +// +// OPMLItem.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation +import os + +public final class OPMLItem: Sendable { + + public let feedSpecifier: OPMLFeedSpecifier + + public let attributes: [String: String] + public let titleFromAttributes: String? + + public let isFolder: Bool + public let children: [OPMLItem]? + + init?(attributes: [String : String], children: [OPMLItem]?) { + + guard let feedURL = attributes.opml_xmlUrl, !feedURL.isEmpty else { + return nil + } + + let titleFromAttributes = { + if let title = attributes.opml_title { + return title + } + return attributes.opml_text + }() + self.titleFromAttributes = titleFromAttributes + + self.feedSpecifier = OPMLFeedSpecifier(title: titleFromAttributes, feedDescription: attributes.opml_description, homePageURL: attributes.opml_htmlUrl, feedURL: feedURL) + + self.attributes = attributes + + self.children = children + self.isFolder = (children?.count ?? 0) > 0 + } +} diff --git a/Modules/Parser/Sources/Parser/Utilities/Dictionary+Parser.swift b/Modules/Parser/Sources/Parser/Utilities/Dictionary+Parser.swift new file mode 100644 index 000000000..828316931 --- /dev/null +++ b/Modules/Parser/Sources/Parser/Utilities/Dictionary+Parser.swift @@ -0,0 +1,28 @@ +// +// Dictionary+Parser.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +extension Dictionary where Key == String, Value == String { + + func object(forCaseInsensitiveKey key: String) -> String? { + + if let object = self[key] { + return object + } + + let lowercaseKey = key.lowercased() + + for (oneKey, oneValue) in self { + if lowercaseKey.caseInsensitiveCompare(oneKey) == .orderedSame { + return oneValue + } + } + + return nil + } +} diff --git a/Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift b/Modules/Parser/Sources/Parser/Utilities/String+Parser.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Utilities/String+RSParser.swift rename to Modules/Parser/Sources/Parser/Utilities/String+Parser.swift diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h deleted file mode 100755 index 688132fe5..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.h +++ /dev/null @@ -1,36 +0,0 @@ -// -// RSOPMLAttributes.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -// OPML allows for arbitrary attributes. -// These are the common attributes in OPML files used as RSS subscription lists. - -extern NSString *OPMLTextKey; //text -extern NSString *OPMLTitleKey; //title -extern NSString *OPMLDescriptionKey; //description -extern NSString *OPMLTypeKey; //type -extern NSString *OPMLVersionKey; //version -extern NSString *OPMLHMTLURLKey; //htmlUrl -extern NSString *OPMLXMLURLKey; //xmlUrl - - -@interface NSDictionary (RSOPMLAttributes) - -// A frequent error in OPML files is to mess up the capitalization, -// so these do a case-insensitive lookup. - -@property (nonatomic, readonly) NSString *opml_text; -@property (nonatomic, readonly) NSString *opml_title; -@property (nonatomic, readonly) NSString *opml_description; -@property (nonatomic, readonly) NSString *opml_type; -@property (nonatomic, readonly) NSString *opml_version; -@property (nonatomic, readonly) NSString *opml_htmlUrl; -@property (nonatomic, readonly) NSString *opml_xmlUrl; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m deleted file mode 100755 index db6508b24..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLAttributes.m +++ /dev/null @@ -1,68 +0,0 @@ -// -// RSOPMLAttributes.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLAttributes.h" -#import "RSParserInternal.h" - - - - -NSString *OPMLTextKey = @"text"; -NSString *OPMLTitleKey = @"title"; -NSString *OPMLDescriptionKey = @"description"; -NSString *OPMLTypeKey = @"type"; -NSString *OPMLVersionKey = @"version"; -NSString *OPMLHMTLURLKey = @"htmlUrl"; -NSString *OPMLXMLURLKey = @"xmlUrl"; - - -@implementation NSDictionary (RSOPMLAttributes) - -- (NSString *)opml_text { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLTextKey]; -} - - -- (NSString *)opml_title { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLTitleKey]; -} - - -- (NSString *)opml_description { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLDescriptionKey]; -} - - -- (NSString *)opml_type { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLTypeKey]; -} - - -- (NSString *)opml_version { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLVersionKey]; -} - - -- (NSString *)opml_htmlUrl { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLHMTLURLKey]; -} - - -- (NSString *)opml_xmlUrl { - - return [self rsparser_objectForCaseInsensitiveKey:OPMLXMLURLKey]; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h deleted file mode 100755 index 15afa48f2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.h +++ /dev/null @@ -1,30 +0,0 @@ -// -// RSOPMLItem.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSOPMLFeedSpecifier; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSOPMLItem : NSObject - -@property (nonatomic, nullable) NSDictionary *attributes; -@property (nonatomic, nullable) NSArray *children; - -- (void)addChild:(RSOPMLItem *)child; - -@property (nonatomic, nullable, readonly) RSOPMLFeedSpecifier *feedSpecifier; - -@property (nonatomic, nullable, readonly) NSString *titleFromAttributes; -@property (nonatomic, readonly) BOOL isFolder; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m deleted file mode 100755 index a273cd317..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLItem.m +++ /dev/null @@ -1,87 +0,0 @@ -// -// RSOPMLItem.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLItem.h" -#import "RSOPMLAttributes.h" -#import "RSOPMLFeedSpecifier.h" -#import "RSParserInternal.h" - - - -@interface RSOPMLItem () - -@property (nonatomic) NSMutableArray *mutableChildren; - -@end - - -@implementation RSOPMLItem - -@synthesize children = _children; -@synthesize feedSpecifier = _feedSpecifier; - - -- (NSArray *)children { - - return [self.mutableChildren copy]; -} - - -- (void)setChildren:(NSArray *)children { - - _children = children; - self.mutableChildren = [_children mutableCopy]; -} - - -- (void)addChild:(RSOPMLItem *)child { - - if (!self.mutableChildren) { - self.mutableChildren = [NSMutableArray new]; - } - - [self.mutableChildren addObject:child]; -} - - -- (RSOPMLFeedSpecifier *)feedSpecifier { - - if (_feedSpecifier) { - return _feedSpecifier; - } - - NSString *feedURL = self.attributes.opml_xmlUrl; - if (RSParserObjectIsEmpty(feedURL)) { - return nil; - } - - _feedSpecifier = [[RSOPMLFeedSpecifier alloc] initWithTitle:self.titleFromAttributes feedDescription:self.attributes.opml_description homePageURL:self.attributes.opml_htmlUrl feedURL:feedURL]; - - return _feedSpecifier; -} - -- (NSString *)titleFromAttributes { - - NSString *title = self.attributes.opml_title; - if (title) { - return title; - } - title = self.attributes.opml_text; - if (title) { - return title; - } - - return nil; -} - -- (BOOL)isFolder { - - return self.mutableChildren.count > 0; -} - -@end From 0127fd4d0bff118bb84ad7cdfb804cec24c5d2af Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 21:08:01 -0700 Subject: [PATCH 07/88] Import RSOPMLDocument to Swift. --- .../Sources/Parser/OPML/OPMLDocument.swift | 15 +++++++++++++ .../Parser/Sources/Parser/OPML/OPMLItem.swift | 10 ++++----- .../Sources/ParserObjC/RSOPMLDocument.h | 21 ------------------- .../Sources/ParserObjC/RSOPMLDocument.m | 14 ------------- 4 files changed, 20 insertions(+), 40 deletions(-) create mode 100644 Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift new file mode 100644 index 000000000..b977b2768 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift @@ -0,0 +1,15 @@ +// +// OPMLDocument.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct OPMLDocument: Sendable { + + public let title: String + public let url: String + public let items: [OPMLItem]? +} diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift index 76a170060..caff99a38 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -8,7 +8,7 @@ import Foundation import os -public final class OPMLItem: Sendable { +public struct OPMLItem: Sendable { public let feedSpecifier: OPMLFeedSpecifier @@ -16,9 +16,9 @@ public final class OPMLItem: Sendable { public let titleFromAttributes: String? public let isFolder: Bool - public let children: [OPMLItem]? + public let items: [OPMLItem]? - init?(attributes: [String : String], children: [OPMLItem]?) { + init?(attributes: [String : String], items: [OPMLItem]?) { guard let feedURL = attributes.opml_xmlUrl, !feedURL.isEmpty else { return nil @@ -36,7 +36,7 @@ public final class OPMLItem: Sendable { self.attributes = attributes - self.children = children - self.isFolder = (children?.count ?? 0) > 0 + self.items = items + self.isFolder = (items?.count ?? 0) > 0 } } diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h deleted file mode 100755 index 5061853fe..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.h +++ /dev/null @@ -1,21 +0,0 @@ -// -// RSOPMLDocument.h -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -#import "RSOPMLItem.h" - - - - -@interface RSOPMLDocument : RSOPMLItem - -@property (nonatomic) NSString *title; -@property (nonatomic) NSString *url; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m deleted file mode 100755 index 1506bd911..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLDocument.m +++ /dev/null @@ -1,14 +0,0 @@ -// -// RSOPMLDocument.m -// RSParser -// -// Created by Brent Simmons on 2/28/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSOPMLDocument.h" - -@implementation RSOPMLDocument - -@end From 2d3c34f96aa6a55bc4afcbb94d77340ea8cae37e Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 18 Aug 2024 22:07:17 -0700 Subject: [PATCH 08/88] Create public, Sendable ParsedOPML* structs to match what we do with feed parsing. --- .../Sources/Parser/OPML/OPMLDocument.swift | 7 +-- .../Parser/Sources/Parser/OPML/OPMLItem.swift | 19 ++++-- .../Sources/Parser/OPML/OPMLParser.swift | 60 +++++++++++++++++++ .../Parser/OPML/ParsedOPMLDocument.swift | 25 ++++++++ .../Parser/OPML/ParsedOPMLFeedSpecifier.swift | 24 ++++++++ .../Sources/Parser/OPML/ParsedOPMLItem.swift | 31 ++++++++++ 6 files changed, 156 insertions(+), 10 deletions(-) create mode 100644 Modules/Parser/Sources/Parser/OPML/OPMLParser.swift create mode 100644 Modules/Parser/Sources/Parser/OPML/ParsedOPMLDocument.swift create mode 100644 Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift create mode 100644 Modules/Parser/Sources/Parser/OPML/ParsedOPMLItem.swift diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift index b977b2768..647ff215c 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift @@ -7,9 +7,8 @@ import Foundation -public struct OPMLDocument: Sendable { +final class OPMLDocument: OPMLItem, @unchecked Sendable { - public let title: String - public let url: String - public let items: [OPMLItem]? + var title: String? + var url: String? } diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift index caff99a38..fba59b526 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -8,17 +8,19 @@ import Foundation import os -public struct OPMLItem: Sendable { +class OPMLItem: @unchecked Sendable { public let feedSpecifier: OPMLFeedSpecifier public let attributes: [String: String] public let titleFromAttributes: String? - public let isFolder: Bool - public let items: [OPMLItem]? + public var items: [OPMLItem]? + public var isFolder: Bool { + items.count > 0 + } - init?(attributes: [String : String], items: [OPMLItem]?) { + init?(attributes: [String : String]) { guard let feedURL = attributes.opml_xmlUrl, !feedURL.isEmpty else { return nil @@ -35,8 +37,13 @@ public struct OPMLItem: Sendable { self.feedSpecifier = OPMLFeedSpecifier(title: titleFromAttributes, feedDescription: attributes.opml_description, homePageURL: attributes.opml_htmlUrl, feedURL: feedURL) self.attributes = attributes + } + + func addItem(_ item: OPMLItem) { - self.items = items - self.isFolder = (items?.count ?? 0) > 0 + if items == nil { + items = [OPMLItem]() + } + items?.append(item) } } diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift new file mode 100644 index 000000000..ad34ff31b --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift @@ -0,0 +1,60 @@ +// +// OPMLParser.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public final class OPMLParser { + + let url: String + let data: Data + + private var itemStack = [OPMLItem]() + + enum OPMLParserError: Error { + case notOPML + } + + init(parserData: ParserData) { + + self.url = parserData.url + self.data = parserData.data + } + + func parse() throws -> OPMLDocument? { + + guard canParseData() else { + throw OPMLParserError.notOPML + } + + let parser = SAXParser(delegate: self, data: data) + + + } +} + +extension OPMLParser: SAXParserDelegate { + + func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) { + + } + + func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + } + + func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + + } + + func saxParser(_: SAXParser, internedStringForName: XMLPointer, prefix: XMLPointer?) -> String? { + + } + + func saxParser(_: SAXParser, internedStringForValue: XMLPointer, count: Int) -> String? { + + } +} diff --git a/Modules/Parser/Sources/Parser/OPML/ParsedOPMLDocument.swift b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLDocument.swift new file mode 100644 index 000000000..ee90cd99d --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLDocument.swift @@ -0,0 +1,25 @@ +// +// ParsedOPMLDocument.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public final class ParsedOPMLDocument: Sendable { + + public let title: String? + public let url: String? + public let items: [ParsedOPMLItem]? + + init(opmlDocument: OPMLDocument) { + + self.title = opmlDocument.title + self.url = opmlDocument.url + + self.items = opmlDocument.items.map { opmlItem in + ParsedOPMLItem(opmlItem: opmlItem) + } + } +} diff --git a/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift new file mode 100644 index 000000000..0d7e574f2 --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift @@ -0,0 +1,24 @@ +// +// ParsedOPMLFeedSpecifier.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct ParsedOPMLFeedSpecifier: Sendable { + + public let title: String? + public let feedDescription: String? + public let homePageURL: String? + public let feedURL: String + + init(_ opmlFeedSpecifier: OPMLFeedSpecifier) { + + self.title = opmlFeedSpecifier.title + self.feedDescription = opmlFeedSpecifier.feedDescription + self.homePageURL = opmlFeedSpecifier.homePageURL + self.feedURL = opmlFeedSpecifier.feedURL + } +} diff --git a/Modules/Parser/Sources/Parser/OPML/ParsedOPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLItem.swift new file mode 100644 index 000000000..517a1681f --- /dev/null +++ b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLItem.swift @@ -0,0 +1,31 @@ +// +// File.swift +// +// +// Created by Brent Simmons on 8/18/24. +// + +import Foundation + +public struct ParsedOPMLItem: Sendable { + + public let feedSpecifier: ParsedOPMLFeedSpecifier? + + public let attributes: [String: String]? + public let title: String? + + public var items: [ParsedOPMLItem]? + public var isFolder: Bool + + init(opmlItem: OPMLItem) { + + self.feedSpecifier = ParsedOPMLFeedSpecifier(opmlItem.feedSpecifier) + self.attributes = opmlItem.attributes + self.title = opmlItem.title + + self.items = opmlItem.items.map { opmlItem in + ParsedOPMLItem(opmlItem: opmlItem) + } + self.isFolder = (self.items?.count ?? 0) > 0 + } +} From 321339186f27b79028409a35c68872cdac010c99 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 23 Aug 2024 18:03:59 -0700 Subject: [PATCH 09/88] Rename OPMLFeedSpecifier to ParsedOPMLFeedSpecifier. --- .../Parser/OPML/OPMLFeedSpecifier.swift | 40 ------------------- .../Parser/OPML/ParsedOPMLFeedSpecifier.swift | 36 ++++++++++++----- 2 files changed, 26 insertions(+), 50 deletions(-) delete mode 100644 Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift deleted file mode 100644 index ee6b1b456..000000000 --- a/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift +++ /dev/null @@ -1,40 +0,0 @@ -// -// File.swift -// -// -// Created by Brent Simmons on 8/18/24. -// - -import Foundation - -public struct OPMLFeedSpecifier: Sendable { - - let title: String? - let feedDescription: String? - let homePageURL: String? - let feedURL: String - - init(title: String?, feedDescription: String?, homePageURL: String?, feedURL: String) { - - if String.isEmptyOrNil(title) { - self.title = nil - } else { - self.title = title - } - - if String.isEmptyOrNil(feedDescription) { - self.feedDescription = nil - } else { - self.feedDescription = feedDescription - } - - if String.isEmptyOrNil(homePageURL) { - self.homePageURL = nil - } else { - self.homePageURL = homePageURL - } - - self.feedURL = feedURL - } -} - diff --git a/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift index 0d7e574f2..ccad2b847 100644 --- a/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift +++ b/Modules/Parser/Sources/Parser/OPML/ParsedOPMLFeedSpecifier.swift @@ -1,6 +1,6 @@ // // ParsedOPMLFeedSpecifier.swift -// +// // // Created by Brent Simmons on 8/18/24. // @@ -9,16 +9,32 @@ import Foundation public struct ParsedOPMLFeedSpecifier: Sendable { - public let title: String? - public let feedDescription: String? - public let homePageURL: String? - public let feedURL: String + let title: String? + let feedDescription: String? + let homePageURL: String? + let feedURL: String - init(_ opmlFeedSpecifier: OPMLFeedSpecifier) { + init(title: String?, feedDescription: String?, homePageURL: String?, feedURL: String) { - self.title = opmlFeedSpecifier.title - self.feedDescription = opmlFeedSpecifier.feedDescription - self.homePageURL = opmlFeedSpecifier.homePageURL - self.feedURL = opmlFeedSpecifier.feedURL + if String.isEmptyOrNil(title) { + self.title = nil + } else { + self.title = title + } + + if String.isEmptyOrNil(feedDescription) { + self.feedDescription = nil + } else { + self.feedDescription = feedDescription + } + + if String.isEmptyOrNil(homePageURL) { + self.homePageURL = nil + } else { + self.homePageURL = homePageURL + } + + self.feedURL = feedURL } } + From a3c10165fa75644f1e5b2a5da483df3057dcb6ea Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 23 Aug 2024 18:05:19 -0700 Subject: [PATCH 10/88] Add missing return value. --- Modules/Parser/Sources/Parser/Utilities/String+Parser.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/Parser/Sources/Parser/Utilities/String+Parser.swift b/Modules/Parser/Sources/Parser/Utilities/String+Parser.swift index 9ed9c0b51..61555a365 100644 --- a/Modules/Parser/Sources/Parser/Utilities/String+Parser.swift +++ b/Modules/Parser/Sources/Parser/Utilities/String+Parser.swift @@ -14,7 +14,7 @@ extension String { return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self } - static func isEmptyOrNil(_ s: String?) { + static func isEmptyOrNil(_ s: String?) -> Bool { if let s { return s.isEmpty } From b7462c89e00793fb9fe4f1a04e1991bc8433b9e5 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 23 Aug 2024 18:06:18 -0700 Subject: [PATCH 11/88] Continue progress on porting OPML code to Swift. --- .../Sources/Parser/OPML/OPMLAttributes.swift | 14 ++++++------ .../Sources/Parser/OPML/OPMLDocument.swift | 9 ++++++-- .../Parser/Sources/Parser/OPML/OPMLItem.swift | 22 +++++-------------- .../Sources/Parser/OPML/OPMLParser.swift | 4 +++- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift b/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift index 0669e0e53..508dd339a 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift @@ -10,13 +10,13 @@ import Foundation // OPML allows for arbitrary attributes. // These are the common attributes in OPML files used as RSS subscription lists. -private static let opmlTextKey = "text" -private static let opmlTitleKey = "title" -private static let opmlDescriptionKey = "description" -private static let opmlTypeKey = "type" -private static let opmlVersionKey = "version" -private static let opmlHMTLURLKey = "htmlUrl" -private static let opmlXMLURLKey = "xmlUrl" +private let opmlTextKey = "text" +private let opmlTitleKey = "title" +private let opmlDescriptionKey = "description" +private let opmlTypeKey = "type" +private let opmlVersionKey = "version" +private let opmlHMTLURLKey = "htmlUrl" +private let opmlXMLURLKey = "xmlUrl" // A frequent error in OPML files is to mess up the capitalization, // so these do a case-insensitive lookup. diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift index 647ff215c..55adb982c 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift @@ -9,6 +9,11 @@ import Foundation final class OPMLDocument: OPMLItem, @unchecked Sendable { - var title: String? - var url: String? + var title: String? = nil + var url: String? = nil + + init(url: String?) { + self.url = url + super.init(attributes: nil) + } } diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift index fba59b526..001ac169a 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -10,7 +10,7 @@ import os class OPMLItem: @unchecked Sendable { - public let feedSpecifier: OPMLFeedSpecifier + public let feedSpecifier: ParsedOPMLFeedSpecifier public let attributes: [String: String] public let titleFromAttributes: String? @@ -20,23 +20,13 @@ class OPMLItem: @unchecked Sendable { items.count > 0 } - init?(attributes: [String : String]) { - - guard let feedURL = attributes.opml_xmlUrl, !feedURL.isEmpty else { - return nil - } - - let titleFromAttributes = { - if let title = attributes.opml_title { - return title - } - return attributes.opml_text - }() - self.titleFromAttributes = titleFromAttributes - - self.feedSpecifier = OPMLFeedSpecifier(title: titleFromAttributes, feedDescription: attributes.opml_description, homePageURL: attributes.opml_htmlUrl, feedURL: feedURL) + init(attributes: [String : String]?) { + self.titleFromAttributes = attributes.opml_title ?? attributes.opml_text self.attributes = attributes + + self.feedSpecifier = ParsedOPMLFeedSpecifier(title: self.titleFromAttributes, feedDescription: attributes.opml_description, homePageURL: attributes.opml_htmlUrl, feedURL: attributes.opml_xmlUrl) + } func addItem(_ item: OPMLItem) { diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift index ad34ff31b..28472fee7 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift @@ -12,6 +12,7 @@ public final class OPMLParser { let url: String let data: Data + private let opmlDocument: OPMLDocument private var itemStack = [OPMLItem]() enum OPMLParserError: Error { @@ -22,9 +23,10 @@ public final class OPMLParser { self.url = parserData.url self.data = parserData.data + self.opmlDocument = OPMLDocument(url: parserData.url) } - func parse() throws -> OPMLDocument? { + func parse() throws -> ParsedOPMLDocument { guard canParseData() else { throw OPMLParserError.notOPML From 213f67d1de2d0033664b1dae8c24538233d65b2c Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 23 Aug 2024 20:31:13 -0700 Subject: [PATCH 12/88] Continue progress on porting OPML code to Swift. --- .../xcshareddata/xcschemes/Parser.xcscheme | 12 +++++++ .../Sources/Parser/OPML/OPMLDocument.swift | 2 +- ...pecifier.swift => OPMLFeedSpecifier.swift} | 4 +-- .../Parser/Sources/Parser/OPML/OPMLItem.swift | 2 +- .../Parser/OPML/ParsedOPMLDocument.swift | 25 --------------- .../Sources/Parser/OPML/ParsedOPMLItem.swift | 31 ------------------- 6 files changed, 16 insertions(+), 60 deletions(-) rename Modules/Parser/Sources/Parser/OPML/{ParsedOPMLFeedSpecifier.swift => OPMLFeedSpecifier.swift} (88%) delete mode 100644 Modules/Parser/Sources/Parser/OPML/ParsedOPMLDocument.swift delete mode 100644 Modules/Parser/Sources/Parser/OPML/ParsedOPMLItem.swift diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme index ed73f5e1b..7bfc0fe97 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme @@ -29,6 +29,18 @@ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" shouldUseLaunchSchemeArgsEnv = "YES" shouldAutocreateTestPlan = "YES"> + + + + + + 0 - } -} From c1db72c09fdb64f3786a8cf9fadcf6df4b5875b6 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 25 Aug 2024 21:59:08 -0700 Subject: [PATCH 13/88] Create Data extension methods for searching for a string. --- .../Parser/Utilities/Data+Parser.swift | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 Modules/Parser/Sources/Parser/Utilities/Data+Parser.swift diff --git a/Modules/Parser/Sources/Parser/Utilities/Data+Parser.swift b/Modules/Parser/Sources/Parser/Utilities/Data+Parser.swift new file mode 100644 index 000000000..052ca6d9c --- /dev/null +++ b/Modules/Parser/Sources/Parser/Utilities/Data+Parser.swift @@ -0,0 +1,68 @@ +// +// Data+Parser.swift +// +// +// Created by Brent Simmons on 8/24/24. +// + +import Foundation + +extension Data { + + /// Return true if the data contains a given String. + /// + /// Assumes that the data is UTF-8 or similar encoding — + /// if it’s UTF-16 or UTF-32, for instance, this will always return false. + /// Luckily these are rare. + /// + /// The String to search for should be something that could be encoded + /// in ASCII — like " Bool { + + contains(searchFor.utf8) + } + + /// Return true if searchFor appears in self. + func contains(_ searchFor: Data) -> Bool { + + let searchForCount = searchFor.count + let dataCount = self.count + + guard searchForCount > 0, searchForCount <= dataCount else { + return false + } + + let searchForInitialByte = searchFor[0] + var found = false + + self.withUnsafeBytes { bytes in + + let buffer = bytes.bindMemory(to: UInt8.self) + + for i in 0...dataCount - searchForCount { + + if buffer[i] == searchForInitialByte { + + var match = true + + for j in 1.. Date: Sun, 25 Aug 2024 22:00:02 -0700 Subject: [PATCH 14/88] =?UTF-8?q?Drop=20the=20interned=20string=20stuff=20?= =?UTF-8?q?=E2=80=94=C2=A0probably=20not=20worth=20the=20effort.=20Could?= =?UTF-8?q?=20always=20re-add=20later=20if=20needed.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Modules/Parser/Sources/Parser/SAXParser.swift | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/Modules/Parser/Sources/Parser/SAXParser.swift b/Modules/Parser/Sources/Parser/SAXParser.swift index ce9bbf885..c614e063f 100644 --- a/Modules/Parser/Sources/Parser/SAXParser.swift +++ b/Modules/Parser/Sources/Parser/SAXParser.swift @@ -17,10 +17,6 @@ protocol SAXParserDelegate { func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) - - func saxParser(_: SAXParser, internedStringForName: XMLPointer, prefix: XMLPointer?) -> String? - - func saxParser(_: SAXParser, internedStringForValue: XMLPointer, count: Int) -> String? } final class SAXParser { @@ -111,24 +107,17 @@ final class SAXParser { continue } let prefix = attributes[j + 1] - var attributeName = delegate.saxParser(self, internedStringForName: attribute, prefix: prefix) - if attributeName == nil { - attributeName = String(cString: attribute) - if let prefix { - let attributePrefix = String(cString: prefix) - attributeName = "\(attributePrefix):\(attributeName!)" - } + var attributeName = String(cString: attribute) + if let prefix { + let attributePrefix = String(cString: prefix) + attributeName = "\(attributePrefix):\(attributeName!)" } guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else { continue } let valueCount = valueEnd - valueStart - - var value = delegate.saxParser(self, internedStringForValue: valueStart, count: Int(valueCount)) - if value == nil { - value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8) - } + var value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8) if let value, let attributeName { dictionary[attributeName] = value @@ -140,6 +129,12 @@ final class SAXParser { return dictionary } + + func stringNoCopy(_ bytes: XMLPointer) -> String { + + let length = strlen(bytes) + return NSString(bytesNoCopy: bytes, length: length, encoding: .utf8, freeWhenDone: false) as String + } } private extension SAXParser { From 0f8c529d0ddb1c3f768ee2c1d05beb691bdd126e Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 25 Aug 2024 22:00:27 -0700 Subject: [PATCH 15/88] Continue progress on porting OPML code to Swift. --- .../Sources/Parser/Feeds/ParsedAuthor.swift | 4 +- .../Parser/Sources/Parser/HTML/HTMLTag.swift | 4 +- .../Sources/Parser/OPML/OPMLDocument.swift | 6 +- .../Parser/Sources/Parser/OPML/OPMLItem.swift | 4 +- .../Sources/Parser/OPML/OPMLParser.swift | 59 +++++++++++++------ 5 files changed, 50 insertions(+), 27 deletions(-) diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift index 3b97cba59..ded26d415 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift +++ b/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift @@ -23,11 +23,11 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { } /// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.) - convenience init(singleString: String) { + init(singleString: String) { if singleString.contains("@") { init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) - } else if singleString.lowercased.hasPrefix("http") { + } else if singleString.lowercased().hasPrefix("http") { init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) } else { init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) diff --git a/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift b/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift index 27acc83aa..1ef7f9ad6 100644 --- a/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift +++ b/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift @@ -9,12 +9,12 @@ import Foundation public struct HTMLTag: Sendable { - public enum HTMLTagType { + public enum TagType { case link case meta } - public let tagType: HTMLTagType + public let tagType: TagType public let attributes: [String: String]? public init(tagType: TagType, attributes: [String : String]?) { diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift index 690d23155..020ad11d3 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift @@ -7,10 +7,10 @@ import Foundation -final class OPMLDocument: OPMLItem { +public final class OPMLDocument: OPMLItem { - var title: String? = nil - var url: String? = nil + public var title: String? = nil + public var url: String? = nil init(url: String?) { self.url = url diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift index 7d1733fbb..35b2c3eac 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -8,9 +8,9 @@ import Foundation import os -class OPMLItem { +public class OPMLItem { - public let feedSpecifier: ParsedOPMLFeedSpecifier + public let feedSpecifier: OPMLFeedSpecifier public let attributes: [String: String] public let titleFromAttributes: String? diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift index 28472fee7..59b972158 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift @@ -9,32 +9,63 @@ import Foundation public final class OPMLParser { - let url: String - let data: Data + private let url: String + private let data: Data private let opmlDocument: OPMLDocument + private var itemStack = [OPMLItem]() - - enum OPMLParserError: Error { - case notOPML + private var currentItem: OPMLItem? { + itemStack.last } - init(parserData: ParserData) { + /// Returns nil if data can’t be parsed (if it’s not OPML). + public static func document(with parserData: ParserData) -> OPMLDocument? { + + let opmlParser = OPMLParser(parserData) + return opmlParser.parse() + } + + init(_ parserData: ParserData) { self.url = parserData.url self.data = parserData.data self.opmlDocument = OPMLDocument(url: parserData.url) } +} - func parse() throws -> ParsedOPMLDocument { +private extension OPMLParser { + + func parse() -> OPMLDocument? { guard canParseData() else { - throw OPMLParserError.notOPML + return nil } - let parser = SAXParser(delegate: self, data: data) - + pushItem(opmlDocument) + let saxParser = SAXParser(delegate: self, data: data) + saxParser.parse() + } + + func canParseData() -> Bool { + + data.containsASCIIString(" 0) + guard itemStack.count > 0 else { + assertionFailure("itemStack.count must be > 0") + } + + itemStack.dropLast() } } @@ -51,12 +82,4 @@ extension OPMLParser: SAXParserDelegate { func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { } - - func saxParser(_: SAXParser, internedStringForName: XMLPointer, prefix: XMLPointer?) -> String? { - - } - - func saxParser(_: SAXParser, internedStringForValue: XMLPointer, count: Int) -> String? { - - } } From c261aff21fd10d85ffe6dd40c993f0ba1e3cdb6d Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 19:27:30 -0700 Subject: [PATCH 16/88] Fix some build errors. --- .../Parser/Sources/Parser/Feeds/ParsedAuthor.swift | 6 +++--- Modules/Parser/Sources/Parser/OPML/OPMLItem.swift | 12 +++++++----- .../Sources/Parser/RSHTMLMetadata+Parser.swift | 10 ---------- 3 files changed, 10 insertions(+), 18 deletions(-) delete mode 100644 Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift index ded26d415..01e91e2ad 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift +++ b/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift @@ -26,11 +26,11 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { init(singleString: String) { if singleString.contains("@") { - init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) + self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) } else if singleString.lowercased().hasPrefix("http") { - init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) + self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) } else { - init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) + self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) } } diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift index 35b2c3eac..9a1e0b23e 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -10,22 +10,24 @@ import os public class OPMLItem { - public let feedSpecifier: OPMLFeedSpecifier + public let feedSpecifier: OPMLFeedSpecifier? - public let attributes: [String: String] + public let attributes: [String: String]? public let titleFromAttributes: String? public var items: [OPMLItem]? public var isFolder: Bool { - items.count > 0 + (items?.count ?? 0) > 0 } init(attributes: [String : String]?) { - self.titleFromAttributes = attributes.opml_title ?? attributes.opml_text + self.titleFromAttributes = attributes?.opml_title ?? attributes?.opml_text self.attributes = attributes - self.feedSpecifier = ParsedOPMLFeedSpecifier(title: self.titleFromAttributes, feedDescription: attributes.opml_description, homePageURL: attributes.opml_htmlUrl, feedURL: attributes.opml_xmlUrl) + if let feedURL = attributes?.opml_xmlUrl { + self.feedSpecifier = OPMLFeedSpecifier(title: self.titleFromAttributes, feedDescription: attributes?.opml_description, homePageURL: attributes?.opml_htmlUrl, feedURL: feedURL) + } } diff --git a/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift b/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift deleted file mode 100644 index 391380b22..000000000 --- a/Modules/Parser/Sources/Parser/RSHTMLMetadata+Parser.swift +++ /dev/null @@ -1,10 +0,0 @@ -// -// File.swift -// -// -// Created by Brent Simmons on 4/7/24. -// - -import Foundation - -extension RSHTMLMetadataParser: @unchecked Sendable {} From e29a232b779928de3474050e873edc204c7af441 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 20:03:35 -0700 Subject: [PATCH 17/88] Create SAXUtilities. --- .../Parser/Sources/Parser/SAXUtilities.swift | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 Modules/Parser/Sources/Parser/SAXUtilities.swift diff --git a/Modules/Parser/Sources/Parser/SAXUtilities.swift b/Modules/Parser/Sources/Parser/SAXUtilities.swift new file mode 100644 index 000000000..81871f08e --- /dev/null +++ b/Modules/Parser/Sources/Parser/SAXUtilities.swift @@ -0,0 +1,18 @@ +// +// File.swift +// +// +// Created by Brent Simmons on 8/26/24. +// + +import Foundation +import libxml2 + +func SAXEqualStrings(_ s1: XMLPointer, _ s2: XMLPointer, length: Int? = nil) -> Bool { + + if length == nil { + return Bool(xmlStrEqual(s1, s2)) + } + + return xmlStrncmp(s1, s2, length) == 0 +} From e7a82376b0fa32dae2b0b706c46176101a4b725a Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 20:03:58 -0700 Subject: [PATCH 18/88] Finish first draft of OPML parser. --- .../Parser/Sources/Parser/OPML/OPMLItem.swift | 2 +- .../Sources/Parser/OPML/OPMLParser.swift | 38 +++++++++++++++++-- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift index 9a1e0b23e..a00a4ce35 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift @@ -31,7 +31,7 @@ public class OPMLItem { } - func addItem(_ item: OPMLItem) { + func add(_ item: OPMLItem) { if items == nil { items = [OPMLItem]() diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift index 59b972158..59ae75b7b 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift @@ -19,6 +19,11 @@ public final class OPMLParser { itemStack.last } + struct XMLKey { + static let title = "title".utf8CString + static let outline = "outline".utf8CString + } + /// Returns nil if data can’t be parsed (if it’s not OPML). public static func document(with parserData: ParserData) -> OPMLDocument? { @@ -53,16 +58,16 @@ private extension OPMLParser { data.containsASCIIString(" 0) guard itemStack.count > 0 else { assertionFailure("itemStack.count must be > 0") + return } itemStack.dropLast() @@ -71,15 +76,40 @@ private extension OPMLParser { extension OPMLParser: SAXParserDelegate { - func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) { + func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) { + if SAXEqualStrings(localName, XMLKey.title) { + saxParser.beginStoringCharacters() + return + } + + if !SAXEqualStrings(localName, XMLKey.outline) { + return + } + + let attributesDictionary = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) + let item = OPMLItem(attributes: attributesDictionary) + + currentItem?.add(item) + push(item) } - func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + if SAXEqualStrings(localname, XMLKey.title) { + if let item = currentItem as? OPMLDocument { + item.title = saxParser.currentStringWithTrimmedWhitespace + } + return + } + + if SAXEqualStrings(localName, XMLKey.outline) { + popItem() + } } func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + // Nothing to do, but method is required. } } From d13014787a5fbfe37af6419243800c19f0ba6d39 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 20:53:57 -0700 Subject: [PATCH 19/88] Create separate SAX target. --- Modules/Parser/Package.swift | 12 + .../Sources/Parser/OPML/OPMLParser.swift | 2 +- .../Sources/{Parser => SAX}/ParserData.swift | 0 .../Parser/Sources/SAX/SAXHTMLParser.swift | 54 +++ .../Sources/{Parser => SAX}/SAXParser.swift | 22 +- .../{Parser => SAX}/SAXUtilities.swift | 6 +- .../Sources/ParserObjC/RSOPMLParser.h | 26 -- .../Sources/ParserObjC/RSOPMLParser.m | 310 --------------- .../Sources/ParserObjC/RSSAXParser.h | 69 ---- .../Sources/ParserObjC/RSSAXParser.m | 353 ------------------ 10 files changed, 78 insertions(+), 776 deletions(-) rename Modules/Parser/Sources/{Parser => SAX}/ParserData.swift (100%) create mode 100644 Modules/Parser/Sources/SAX/SAXHTMLParser.swift rename Modules/Parser/Sources/{Parser => SAX}/SAXParser.swift (83%) rename Modules/Parser/Sources/{Parser => SAX}/SAXUtilities.swift (67%) delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 6d7320717..3a68833dc 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -12,6 +12,10 @@ let package = Package( name: "Parser", type: .dynamic, targets: ["Parser"]), + .library( + name: "SAX", + type: .dynamic, + targets: ["SAX"]) ], dependencies: [ ], @@ -20,6 +24,14 @@ let package = Package( // Targets can depend on other targets in this package, and on products in packages this package depends on. .target( name: "Parser", + dependencies: [ + "SAX" + ], + swiftSettings: [ + .enableExperimentalFeature("StrictConcurrency") + ]), + .target( + name: "SAX", dependencies: [], swiftSettings: [ .enableExperimentalFeature("StrictConcurrency") diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift index 59ae75b7b..0ecda56e9 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift +++ b/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift @@ -76,7 +76,7 @@ private extension OPMLParser { extension OPMLParser: SAXParserDelegate { - func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) { + func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { if SAXEqualStrings(localName, XMLKey.title) { saxParser.beginStoringCharacters() diff --git a/Modules/Parser/Sources/Parser/ParserData.swift b/Modules/Parser/Sources/SAX/ParserData.swift similarity index 100% rename from Modules/Parser/Sources/Parser/ParserData.swift rename to Modules/Parser/Sources/SAX/ParserData.swift diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift new file mode 100644 index 000000000..43ba1cc38 --- /dev/null +++ b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift @@ -0,0 +1,54 @@ +//// +//// SAXHTMLParser.swift +//// +//// +//// Created by Brent Simmons on 8/26/24. +//// +// +//import Foundation +//import libxml2 +// +//protocol SAXHTMLParserDelegate: AnyObject { +// +// func saxParser(_: SAXHTMLParser, XMLStartElement localName: XMLPointer, attributes: UnsafePointer?) +// +// func saxParser(_: SAXHTMLParser, XMLEndElement localName: XMLPointer?) +// +// // Length is guaranteed to be greater than 0. +// func saxParser(_: SAXHTMLParser, XMLCharactersFound characters: XMLPointer?, length: Int) +//} +// +//final class SAXHTMLParser { +// +// fileprivate let delegate: SAXHTMLParserDelegate +// private var data: Data +// +// init(delegate: SAXHTMLParserDelegate, data: Data) { +// +// self.delegate = delegate +// self.data = data +// } +// +// func parse() { +// +// guard !data.isEmpty else { +// return +// } +// +// data.withUnsafeBytes { bufferPointer in +// +// guard let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress else { +// return +// } +// +// let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count)) +// let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding) +// htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT)) +// +// htmlParseChunk(context, bytes, Int32(data.count), 0) +// +// htmlParseChunk(context, nil, 0, 1) +// htmlFreeParserCtxt(context) +// } +// } +//} diff --git a/Modules/Parser/Sources/Parser/SAXParser.swift b/Modules/Parser/Sources/SAX/SAXParser.swift similarity index 83% rename from Modules/Parser/Sources/Parser/SAXParser.swift rename to Modules/Parser/Sources/SAX/SAXParser.swift index c614e063f..160395f8d 100644 --- a/Modules/Parser/Sources/Parser/SAXParser.swift +++ b/Modules/Parser/Sources/SAX/SAXParser.swift @@ -12,7 +12,7 @@ typealias XMLPointer = UnsafePointer protocol SAXParserDelegate { - func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) + func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) @@ -69,8 +69,8 @@ final class SAXParser { xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue)) data.withUnsafeBytes { bufferPointer in - if let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress { - xmlParseChunk(context, bytes, CInt(data.count), 0) + if let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress { + xmlParseChunk(context, bytes, Int32(data.count), 0) } } @@ -110,16 +110,16 @@ final class SAXParser { var attributeName = String(cString: attribute) if let prefix { let attributePrefix = String(cString: prefix) - attributeName = "\(attributePrefix):\(attributeName!)" + attributeName = "\(attributePrefix):\(attributeName)" } guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else { continue } let valueCount = valueEnd - valueStart - var value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8) + let value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8) - if let value, let attributeName { + if let value { dictionary[attributeName] = value } @@ -129,12 +129,6 @@ final class SAXParser { return dictionary } - - func stringNoCopy(_ bytes: XMLPointer) -> String { - - let length = strlen(bytes) - return NSString(bytesNoCopy: bytes, length: length, encoding: .utf8, freeWhenDone: false) as String - } } private extension SAXParser { @@ -148,7 +142,7 @@ private extension SAXParser { delegate.saxParser(self, xmlCharactersFound: xmlCharacters, count: count) } - func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer?) { + func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { delegate.saxParser(self, xmlStartElement: name, prefix: prefix, uri: uri, namespaceCount: namespaceCount, namespaces: namespaces, attributeCount: attributeCount, attributesDefaultedCount: attributesDefaultedCount, attributes: attributes) } @@ -160,7 +154,7 @@ private extension SAXParser { } } -private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer?) { +private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafePointer?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer?) { guard let context, let name else { return diff --git a/Modules/Parser/Sources/Parser/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift similarity index 67% rename from Modules/Parser/Sources/Parser/SAXUtilities.swift rename to Modules/Parser/Sources/SAX/SAXUtilities.swift index 81871f08e..bbc2b9352 100644 --- a/Modules/Parser/Sources/Parser/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -10,9 +10,9 @@ import libxml2 func SAXEqualStrings(_ s1: XMLPointer, _ s2: XMLPointer, length: Int? = nil) -> Bool { - if length == nil { - return Bool(xmlStrEqual(s1, s2)) + if let length { + return xmlStrncmp(s1, s2, Int32(length)) == 0 } - return xmlStrncmp(s1, s2, length) == 0 + return xmlStrEqual(s1, s2) != 0 } diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h deleted file mode 100755 index 8db594b03..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// RSOPMLParser.h -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@class ParserData; -@class RSOPMLDocument; - -typedef void (^OPMLParserCallback)(RSOPMLDocument *opmlDocument, NSError *error); - -// Parses on background thread; calls back on main thread. -void RSParseOPML(ParserData *parserData, OPMLParserCallback callback); - - -@interface RSOPMLParser: NSObject - -+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error; - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m deleted file mode 100755 index 93f2c420c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSOPMLParser.m +++ /dev/null @@ -1,310 +0,0 @@ -// -// RSOPMLParser.m -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSOPMLParser.h" -#import "RSSAXParser.h" -#import "RSOPMLItem.h" -#import "RSOPMLDocument.h" -#import "RSOPMLAttributes.h" -#import "RSOPMLError.h" -#import "RSOPMLParser.h" -#import "ParserData.h" - -#import - - - -@interface RSOPMLParser () - -@property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument; -@property (nonatomic, readwrite) NSError *error; -@property (nonatomic) NSMutableArray *itemStack; - -@end - -void RSParseOPML(ParserData *parserData, OPMLParserCallback callback) { - - dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - - @autoreleasepool { - NSError *error = nil; - RSOPMLDocument *opmlDocument = [RSOPMLParser parseOPMLWithParserData:parserData error:&error]; - - dispatch_async(dispatch_get_main_queue(), ^{ - callback(opmlDocument, error); - }); - } - }); -} - -@implementation RSOPMLParser - -#pragma mark - Class Methods - -+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error { - - RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData]; - - RSOPMLDocument *document = parser.OPMLDocument; - document.url = parserData.url; - if (parser.error && error) { - *error = parser.error; - return nil; - } - return document; -} - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - [self parse:parserData]; - - return self; -} - - -#pragma mark - Private - -- (void)parse:(ParserData *)parserData { - - @autoreleasepool { - - if (![self canParseData:parserData.data]) { - - NSString *filename = nil; - NSURL *url = [NSURL URLWithString:parserData.url]; - if (url && url.isFileURL) { - filename = url.path.lastPathComponent; - } - if ([parserData.url hasPrefix:@"http"]) { - filename = parserData.url; - } - if (!filename) { - filename = parserData.url; - } - self.error = RSOPMLWrongFormatError(filename); - return; - } - - RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self]; - - self.itemStack = [NSMutableArray new]; - self.OPMLDocument = [RSOPMLDocument new]; - [self pushItem:self.OPMLDocument]; - - [parser parseData:parserData.data]; - [parser finishParsing]; - } -} - -- (BOOL)canParseData:(NSData *)d { - - // Check for 0, nil); - - /*If itemStack is empty, bad things are happening. - But we still shouldn't crash in production.*/ - - if (self.itemStack.count > 0) { - [self.itemStack removeLastObject]; - } -} - - -- (RSOPMLItem *)currentItem { - - return self.itemStack.lastObject; -} - - -#pragma mark - RSSAXParserDelegate - -static const char *kOutline = "outline"; -static const char kOutlineLength = 8; - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - [SAXParser beginStoringCharacters]; - return; - } - - if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) { - return; - } - - RSOPMLItem *item = [RSOPMLItem new]; - item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - - [[self currentItem] addChild:item]; - [self pushItem:item]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - RSOPMLItem* item = [self currentItem]; - if ([item isKindOfClass:[RSOPMLDocument class]]) { - ((RSOPMLDocument *)item).title = SAXParser.currentStringWithTrimmedWhitespace; - } - return; - } - - if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) { - [self popItem]; - } -} - - -static const char *kText = "text"; -static const NSInteger kTextLength = 5; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kDescription = "description"; -static const NSInteger kDescriptionLength = 12; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kVersion = "version"; -static const NSInteger kVersionLength = 8; - -static const char *kHTMLURL = "htmlUrl"; -static const NSInteger kHTMLURLLength = 8; - -static const char *kXMLURL = "xmlUrl"; -static const NSInteger kXMLURLLength = 7; - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (prefix) { - return nil; - } - - size_t nameLength = strlen((const char *)name); - - if (nameLength == kTextLength - 1) { - if (RSSAXEqualTags(name, kText, kTextLength)) { - return OPMLTextKey; - } - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return OPMLTypeKey; - } - } - - else if (nameLength == kTitleLength - 1) { - if (RSSAXEqualTags(name, kTitle, kTitleLength)) { - return OPMLTitleKey; - } - } - - else if (nameLength == kXMLURLLength - 1) { - if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) { - return OPMLXMLURLKey; - } - } - - else if (nameLength == kVersionLength - 1) { - if (RSSAXEqualTags(name, kVersion, kVersionLength)) { - return OPMLVersionKey; - } - if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) { - return OPMLHMTLURLKey; - } - } - - else if (nameLength == kDescriptionLength - 1) { - if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) { - return OPMLDescriptionKey; - } - } - - return nil; -} - - -static const char *kRSSUppercase = "RSS"; -static const char *kRSSLowercase = "rss"; -static const NSUInteger kRSSLength = 3; -static NSString *RSSUppercaseValue = @"RSS"; -static NSString *RSSLowercaseValue = @"rss"; -static NSString *emptyString = @""; - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - - if (length < 1) { - return emptyString; - } - - if (length == kRSSLength) { - - if (equalBytes(bytes, kRSSUppercase, kRSSLength)) { - return RSSUppercaseValue; - } - else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) { - return RSSLowercaseValue; - } - - } - - return nil; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h deleted file mode 100755 index 80ca30a75..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.h +++ /dev/null @@ -1,69 +0,0 @@ -// -// RSSAXParser.h -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -/*Thread-safe, not re-entrant. - - Calls to the delegate will happen on the same thread where the parser runs. - - This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use. - - This class is not meant to be sub-classed. Use the delegate methods. - */ - - -@class RSSAXParser; - -@protocol RSSAXParserDelegate - -@optional - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes; - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri; - -// Length is guaranteed to be greater than 0. -- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/ - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/ - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length; - -@end - - -void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser. - -/*For use by delegate.*/ - -BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength); - - -@interface RSSAXParser : NSObject - -- (instancetype)initWithDelegate:(id)delegate; - -- (void)parseData:(NSData *)data; -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; -- (void)cancel; - -@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/ -@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/ -@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/ - -/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/ - -- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m deleted file mode 100755 index 02d6988b8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXParser.m +++ /dev/null @@ -1,353 +0,0 @@ -// -// RSSAXParser.m -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSSAXParser.h" -#import "RSParserInternal.h" - -#import -#import -#import - - - -@interface RSSAXParser () - -@property (nonatomic, weak) id delegate; -@property (nonatomic, assign) xmlParserCtxtPtr context; -@property (nonatomic, assign) BOOL storingCharacters; -@property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToInternedStringMethod; -@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - -@end - - -@implementation RSSAXParser - -+ (void)initialize { - - RSSAXInitLibXMLParser(); -} - - -#pragma mark - Init - -- (instancetype)initWithDelegate:(id)delegate { - - self = [super init]; - if (self == nil) - return nil; - - _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) { - _delegateRespondsToInternedStringMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) { - _delegateRespondsToInternedStringForValueMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; - } - - return self; -} - - -#pragma mark - Dealloc - -- (void)dealloc { - if (_context != nil) { - xmlFreeParserCtxt(_context); - _context = nil; - } - _delegate = nil; -} - - -#pragma mark - API - -static xmlSAXHandler saxHandlerStruct; - -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { - - if (self.context == nil) { - - self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil); - xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT); - } - - @autoreleasepool { - xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); - } -} - - -- (void)finishParsing { - - NSAssert(self.context != nil, nil); - if (self.context == nil) - return; - - @autoreleasepool { - xmlParseChunk(self.context, nil, 0, 1); - xmlFreeParserCtxt(self.context); - self.context = nil; - self.characters = nil; - } -} - - -- (void)cancel { - - @autoreleasepool { - xmlStopParser(self.context); - } -} - - -- (void)beginStoringCharacters { - self.storingCharacters = YES; - self.characters = [NSMutableData new]; -} - - -- (void)endStoringCharacters { - self.storingCharacters = NO; - self.characters = nil; -} - - -- (NSData *)currentCharacters { - - if (!self.storingCharacters) { - return nil; - } - - return self.characters; -} - - -- (NSString *)currentString { - - NSData *d = self.currentCharacters; - if (RSParserObjectIsEmpty(d)) { - return nil; - } - - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; -} - - -- (NSString *)currentStringWithTrimmedWhitespace { - - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; -} - - -#pragma mark - Attributes Dictionary - -- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes { - - if (numberOfAttributes < 1 || !attributes) { - return nil; - } - - NSMutableDictionary *d = [NSMutableDictionary new]; - - @autoreleasepool { - NSInteger i = 0, j = 0; - for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) { - - NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]); - NSString *value = nil; - - if (self.delegateRespondsToInternedStringForValueMethod) { - value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue]; - } - if (!value) { - value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding]; - } - - NSString *attributeName = nil; - - if (self.delegateRespondsToInternedStringMethod) { - attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]]; - } - - if (!attributeName) { - attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]]; - if (attributes[j + 1]) { - NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]]; - attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName]; - } - } - - if (value && attributeName) { - d[attributeName] = value; - } - } - } - - return d; -} - - -#pragma mark - Equal Tags - -BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) { - - if (!localName) { - return NO; - } - return !strncmp((const char *)localName, tag, (size_t)tagLength); -} - - -#pragma mark - Callbacks - -- (void)xmlEndDocument { - - @autoreleasepool { - if (self.delegateRespondsToEndOfDocumentMethod) { - [self.delegate saxParserDidReachEndOfDocument:self]; - } - - [self endStoringCharacters]; - } -} - - -- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { - - if (length < 1) { - return; - } - - @autoreleasepool { - if (self.storingCharacters) { - [self.characters appendBytes:(const void *)ch length:length]; - } - - if (self.delegateRespondsToCharactersFoundMethod) { - [self.delegate saxParser:self XMLCharactersFound:ch length:length]; - } - } -} - - -- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - - [self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes]; - } - } -} - - -- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - @autoreleasepool { - if (self.delegateRespondsToEndElementMethod) { - [self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri]; - } - - [self endStoringCharacters]; - } -} - - -@end - - -static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { - - [(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes]; -} - - -static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { - [(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI]; -} - - -static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { - [(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; -} - - -static void endDocumentSAX(void *context) { - [(__bridge RSSAXParser *)context xmlEndDocument]; -} - - -static xmlSAXHandler saxHandlerStruct = { - nil, /* internalSubset */ - nil, /* isStandalone */ - nil, /* hasInternalSubset */ - nil, /* hasExternalSubset */ - nil, /* resolveEntity */ - nil, /* getEntity */ - nil, /* entityDecl */ - nil, /* notationDecl */ - nil, /* attributeDecl */ - nil, /* elementDecl */ - nil, /* unparsedEntityDecl */ - nil, /* setDocumentLocator */ - nil, /* startDocument */ - endDocumentSAX, /* endDocument */ - nil, /* startElement*/ - nil, /* endElement */ - nil, /* reference */ - charactersFoundSAX, /* characters */ - nil, /* ignorableWhitespace */ - nil, /* processingInstruction */ - nil, /* comment */ - nil, /* warning */ - nil, /* error */ - nil, /* fatalError //: unused error() get all the errors */ - nil, /* getParameterEntity */ - nil, /* cdataBlock */ - nil, /* externalSubset */ - XML_SAX2_MAGIC, - nil, - startElementSAX, /* startElementNs */ - endElementSAX, /* endElementNs */ - nil /* serror */ -}; - - -void RSSAXInitLibXMLParser(void) { - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - xmlInitParser(); - }); -} - From f63af89e3144e8736c5f8cc7e1dc8ccb14eba2ce Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 20:56:20 -0700 Subject: [PATCH 20/88] Move some extensions into SAX package. --- .../Utilities/Data+Parser.swift => SAX/Extensions/Data+SAX.swift} | 0 .../Extensions/Dictionary+SAX.swift} | 0 .../String+Parser.swift => SAX/Extensions/String+SAX.swift} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename Modules/Parser/Sources/{Parser/Utilities/Data+Parser.swift => SAX/Extensions/Data+SAX.swift} (100%) rename Modules/Parser/Sources/{Parser/Utilities/Dictionary+Parser.swift => SAX/Extensions/Dictionary+SAX.swift} (100%) rename Modules/Parser/Sources/{Parser/Utilities/String+Parser.swift => SAX/Extensions/String+SAX.swift} (100%) diff --git a/Modules/Parser/Sources/Parser/Utilities/Data+Parser.swift b/Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Utilities/Data+Parser.swift rename to Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift diff --git a/Modules/Parser/Sources/Parser/Utilities/Dictionary+Parser.swift b/Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Utilities/Dictionary+Parser.swift rename to Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift diff --git a/Modules/Parser/Sources/Parser/Utilities/String+Parser.swift b/Modules/Parser/Sources/SAX/Extensions/String+SAX.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Utilities/String+Parser.swift rename to Modules/Parser/Sources/SAX/Extensions/String+SAX.swift From 4349dd26ff8e7794e3a210a86fa31f40462d7980 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 22:39:46 -0700 Subject: [PATCH 21/88] Create OPMLParserTests. --- .../xcschemes/OPMLParser.xcscheme | 67 + .../xcschemes/OPMLParserTests.xcscheme | 54 + .../xcschemes/Parser-Package.xcscheme | 117 + .../xcode/xcshareddata/xcschemes/SAX.xcscheme | 67 + Modules/Parser/Package.swift | 19 +- .../OPML => OPMLParser}/OPMLAttributes.swift | 0 .../OPML => OPMLParser}/OPMLDocument.swift | 0 .../OPMLFeedSpecifier.swift | 1 + .../OPML => OPMLParser}/OPMLItem.swift | 3 +- .../OPML => OPMLParser}/OPMLParser.swift | 39 +- .../Sources/SAX/Extensions/Data+SAX.swift | 2 +- .../SAX/Extensions/Dictionary+SAX.swift | 2 +- .../Sources/SAX/Extensions/String+SAX.swift | 2 +- .../{Parser/HTML => SAX}/HTMLTag.swift | 2 +- Modules/Parser/Sources/SAX/ParserData.swift | 4 +- Modules/Parser/Sources/SAX/SAXParser.swift | 29 +- Modules/Parser/Sources/SAX/SAXUtilities.swift | 25 +- .../OPMLTests.swift | 43 +- .../Resources/DaringFireball.rss | 2278 +++++++++++++++++ .../Resources/Subs.opml | 0 .../Resources/SubsNoTitleAttributes.opml | 0 21 files changed, 2692 insertions(+), 62 deletions(-) create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme rename Modules/Parser/Sources/{Parser/OPML => OPMLParser}/OPMLAttributes.swift (100%) rename Modules/Parser/Sources/{Parser/OPML => OPMLParser}/OPMLDocument.swift (100%) rename Modules/Parser/Sources/{Parser/OPML => OPMLParser}/OPMLFeedSpecifier.swift (98%) rename Modules/Parser/Sources/{Parser/OPML => OPMLParser}/OPMLItem.swift (95%) rename Modules/Parser/Sources/{Parser/OPML => OPMLParser}/OPMLParser.swift (59%) rename Modules/Parser/Sources/{Parser/HTML => SAX}/HTMLTag.swift (91%) rename Modules/Parser/Tests/{ParserTests => OPMLParserTests}/OPMLTests.swift (52%) create mode 100755 Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss rename Modules/Parser/Tests/{ParserTests => OPMLParserTests}/Resources/Subs.opml (100%) rename Modules/Parser/Tests/{ParserTests => OPMLParserTests}/Resources/SubsNoTitleAttributes.opml (100%) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme new file mode 100644 index 000000000..218cca92a --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme new file mode 100644 index 000000000..5b3a92420 --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme new file mode 100644 index 000000000..1dcb9ee6e --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme @@ -0,0 +1,117 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme new file mode 100644 index 000000000..0267c802a --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 3a68833dc..745b19884 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -15,13 +15,26 @@ let package = Package( .library( name: "SAX", type: .dynamic, - targets: ["SAX"]) + targets: ["SAX"]), + .library( + name: "OPMLParser", + type: .dynamic, + targets: ["OPMLParser"]) + ], dependencies: [ ], targets: [ // Targets are the basic building blocks of a package. A target can define a module or a test suite. // Targets can depend on other targets in this package, and on products in packages this package depends on. + .target( + name: "OPMLParser", + dependencies: [ + "SAX" + ], + swiftSettings: [ + .enableExperimentalFeature("StrictConcurrency") + ]), .target( name: "Parser", dependencies: [ @@ -41,6 +54,10 @@ let package = Package( dependencies: ["Parser"], exclude: ["Info.plist"], resources: [.copy("Resources")]), + .testTarget( + name: "OPMLParserTests", + dependencies: ["OPMLParser"], + resources: [.copy("Resources")]), ] ) diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift b/Modules/Parser/Sources/OPMLParser/OPMLAttributes.swift similarity index 100% rename from Modules/Parser/Sources/Parser/OPML/OPMLAttributes.swift rename to Modules/Parser/Sources/OPMLParser/OPMLAttributes.swift diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift b/Modules/Parser/Sources/OPMLParser/OPMLDocument.swift similarity index 100% rename from Modules/Parser/Sources/Parser/OPML/OPMLDocument.swift rename to Modules/Parser/Sources/OPMLParser/OPMLDocument.swift diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift b/Modules/Parser/Sources/OPMLParser/OPMLFeedSpecifier.swift similarity index 98% rename from Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift rename to Modules/Parser/Sources/OPMLParser/OPMLFeedSpecifier.swift index 2b5e43856..d22f566b1 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLFeedSpecifier.swift +++ b/Modules/Parser/Sources/OPMLParser/OPMLFeedSpecifier.swift @@ -6,6 +6,7 @@ // import Foundation +import SAX public struct OPMLFeedSpecifier: Sendable { diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift b/Modules/Parser/Sources/OPMLParser/OPMLItem.swift similarity index 95% rename from Modules/Parser/Sources/Parser/OPML/OPMLItem.swift rename to Modules/Parser/Sources/OPMLParser/OPMLItem.swift index a00a4ce35..2f0e972e9 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLItem.swift +++ b/Modules/Parser/Sources/OPMLParser/OPMLItem.swift @@ -27,8 +27,9 @@ public class OPMLItem { if let feedURL = attributes?.opml_xmlUrl { self.feedSpecifier = OPMLFeedSpecifier(title: self.titleFromAttributes, feedDescription: attributes?.opml_description, homePageURL: attributes?.opml_htmlUrl, feedURL: feedURL) + } else { + self.feedSpecifier = nil } - } func add(_ item: OPMLItem) { diff --git a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift similarity index 59% rename from Modules/Parser/Sources/Parser/OPML/OPMLParser.swift rename to Modules/Parser/Sources/OPMLParser/OPMLParser.swift index 0ecda56e9..578610468 100644 --- a/Modules/Parser/Sources/Parser/OPML/OPMLParser.swift +++ b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift @@ -6,13 +6,16 @@ // import Foundation +import SAX public final class OPMLParser { - private let url: String - private let data: Data + private let parserData: ParserData + private var data: Data { + parserData.data + } - private let opmlDocument: OPMLDocument + private var opmlDocument: OPMLDocument? private var itemStack = [OPMLItem]() private var currentItem: OPMLItem? { @@ -28,26 +31,26 @@ public final class OPMLParser { public static func document(with parserData: ParserData) -> OPMLDocument? { let opmlParser = OPMLParser(parserData) - return opmlParser.parse() + opmlParser.parse() + return opmlParser.opmlDocument } init(_ parserData: ParserData) { - self.url = parserData.url - self.data = parserData.data - self.opmlDocument = OPMLDocument(url: parserData.url) + self.parserData = parserData } } private extension OPMLParser { - func parse() -> OPMLDocument? { + func parse() { guard canParseData() else { - return nil + return } - pushItem(opmlDocument) + opmlDocument = OPMLDocument(url: parserData.url) + push(opmlDocument!) let saxParser = SAXParser(delegate: self, data: data) saxParser.parse() @@ -70,20 +73,20 @@ private extension OPMLParser { return } - itemStack.dropLast() + _ = itemStack.dropLast() } } extension OPMLParser: SAXParserDelegate { - func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { - if SAXEqualStrings(localName, XMLKey.title) { + if SAXEqualTags(localName, XMLKey.title) { saxParser.beginStoringCharacters() return } - if !SAXEqualStrings(localName, XMLKey.outline) { + if !SAXEqualTags(localName, XMLKey.outline) { return } @@ -94,21 +97,21 @@ extension OPMLParser: SAXParserDelegate { push(item) } - func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { - if SAXEqualStrings(localname, XMLKey.title) { + if SAXEqualTags(localName, XMLKey.title) { if let item = currentItem as? OPMLDocument { item.title = saxParser.currentStringWithTrimmedWhitespace } return } - if SAXEqualStrings(localName, XMLKey.outline) { + if SAXEqualTags(localName, XMLKey.outline) { popItem() } } - func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + public func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { // Nothing to do, but method is required. } diff --git a/Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift b/Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift index 052ca6d9c..036ef3cd7 100644 --- a/Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift +++ b/Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift @@ -7,7 +7,7 @@ import Foundation -extension Data { +public extension Data { /// Return true if the data contains a given String. /// diff --git a/Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift b/Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift index 828316931..8cd83b803 100644 --- a/Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift +++ b/Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift @@ -7,7 +7,7 @@ import Foundation -extension Dictionary where Key == String, Value == String { +public extension Dictionary where Key == String, Value == String { func object(forCaseInsensitiveKey key: String) -> String? { diff --git a/Modules/Parser/Sources/SAX/Extensions/String+SAX.swift b/Modules/Parser/Sources/SAX/Extensions/String+SAX.swift index 61555a365..8a5882498 100644 --- a/Modules/Parser/Sources/SAX/Extensions/String+SAX.swift +++ b/Modules/Parser/Sources/SAX/Extensions/String+SAX.swift @@ -8,7 +8,7 @@ import Foundation -extension String { +public extension String { var nilIfEmptyOrWhitespace: String? { return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self diff --git a/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift b/Modules/Parser/Sources/SAX/HTMLTag.swift similarity index 91% rename from Modules/Parser/Sources/Parser/HTML/HTMLTag.swift rename to Modules/Parser/Sources/SAX/HTMLTag.swift index 1ef7f9ad6..1333d9cff 100644 --- a/Modules/Parser/Sources/Parser/HTML/HTMLTag.swift +++ b/Modules/Parser/Sources/SAX/HTMLTag.swift @@ -9,7 +9,7 @@ import Foundation public struct HTMLTag: Sendable { - public enum TagType { + public enum TagType: Sendable { case link case meta } diff --git a/Modules/Parser/Sources/SAX/ParserData.swift b/Modules/Parser/Sources/SAX/ParserData.swift index 1ef7e822a..67fc71a3f 100644 --- a/Modules/Parser/Sources/SAX/ParserData.swift +++ b/Modules/Parser/Sources/SAX/ParserData.swift @@ -9,8 +9,8 @@ import Foundation public struct ParserData: Sendable { - let url: String - let data: Data + public let url: String + public let data: Data public init(url: String, data: Data) { self.url = url diff --git a/Modules/Parser/Sources/SAX/SAXParser.swift b/Modules/Parser/Sources/SAX/SAXParser.swift index 160395f8d..5d8c6244e 100644 --- a/Modules/Parser/Sources/SAX/SAXParser.swift +++ b/Modules/Parser/Sources/SAX/SAXParser.swift @@ -8,9 +8,9 @@ import Foundation import libxml2 -typealias XMLPointer = UnsafePointer +public typealias XMLPointer = UnsafePointer -protocol SAXParserDelegate { +public protocol SAXParserDelegate { func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) @@ -19,11 +19,11 @@ protocol SAXParserDelegate { func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int) } -final class SAXParser { +public final class SAXParser { fileprivate let delegate: SAXParserDelegate - var currentCharacters: Data? { // UTF-8 encoded + public var currentCharacters: Data? { // UTF-8 encoded guard storingCharacters else { return nil @@ -33,7 +33,7 @@ final class SAXParser { // Conveniences to get string version of currentCharacters - var currentString: String? { + public var currentString: String? { guard let d = currentCharacters, !d.isEmpty else { return nil @@ -41,7 +41,7 @@ final class SAXParser { return String(data: d, encoding: .utf8) } - var currentStringWithTrimmedWhitespace: String? { + public var currentStringWithTrimmedWhitespace: String? { guard let s = currentString else { return nil @@ -53,13 +53,13 @@ final class SAXParser { private var storingCharacters = false private var characters = Data() - init(delegate: SAXParserDelegate, data: Data) { + public init(delegate: SAXParserDelegate, data: Data) { self.delegate = delegate self.data = data } - func parse() { + public func parse() { guard !data.isEmpty else { return @@ -69,7 +69,7 @@ final class SAXParser { xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue)) data.withUnsafeBytes { bufferPointer in - if let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress { + if let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress { xmlParseChunk(context, bytes, Int32(data.count), 0) } } @@ -79,7 +79,7 @@ final class SAXParser { } /// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement. - func beginStoringCharacters() { + public func beginStoringCharacters() { storingCharacters = true characters.count = 0 @@ -91,7 +91,7 @@ final class SAXParser { characters.count = 0 } - func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> [String: String]? { + public func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> [String: String]? { guard attributeCount > 0, let attributes else { return nil @@ -154,7 +154,7 @@ private extension SAXParser { } } -private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafePointer?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer?) { +private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer?) { guard let context, let name else { return @@ -194,8 +194,9 @@ nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = { var handler = xmlSAXHandler() handler.characters = charactersFound - handler.startElement = startElement - handler.endElement = endElement + handler.startElementNs = startElement + handler.endElementNs = endElement + handler.initialized = XML_SAX2_MAGIC return handler }() diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift index bbc2b9352..cdc2fdc27 100644 --- a/Modules/Parser/Sources/SAX/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -8,11 +8,26 @@ import Foundation import libxml2 -func SAXEqualStrings(_ s1: XMLPointer, _ s2: XMLPointer, length: Int? = nil) -> Bool { +public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray) -> Bool { - if let length { - return xmlStrncmp(s1, s2, Int32(length)) == 0 + return tag.withUnsafeBufferPointer { bufferPointer in + + let tagCount = tag.count + + for i in 0.. 0 - if !isFolder && (item.attributes! as NSDictionary).opml_title == "Skip" { + var isFolder = item.items != nil && item.items!.count > 0 + if !isFolder && item.attributes?.opml_title == "Skip" { isFolder = true } @@ -70,10 +72,17 @@ private extension OPMLTests { XCTAssertNil(feedSpecifier) } - if item.children != nil && item.children!.count > 0 { - for oneItem in item.children! { + if item.items != nil && item.items!.count > 0 { + for oneItem in item.items! { recursivelyCheckOPMLStructure(oneItem) } } } } + +func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { + let filename = "Resources/\(filename)" + let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! + let data = try! Data(contentsOf: URL(fileURLWithPath: path)) + return ParserData(url: url, data: data) +} diff --git a/Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss b/Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss new file mode 100755 index 000000000..ba3d1400a --- /dev/null +++ b/Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss @@ -0,0 +1,2278 @@ + + +Daring Fireball +By John Gruber + + +http://daringfireball.net/feeds/main + +2016-02-28T21:06:52ZCopyright © 2016, John Gruber + Apple Product Event: Monday March 21 + + + + tag:daringfireball.net,2016:/linked//6.32173 + 2016-02-27T21:59:47Z + 2016-02-27T22:39:17Z + + John Gruber + http://daringfireball.net/ + + Kara Swisher, writing at Recode, broke the news:

+ +
+

Attention Apple nerds, investors, media and everyone else who +needs to know when Tim Cook’s next product event is going to be +held: It’s going to be the week of March 21.

+ +

Or to put it another way, it’s not going to be on March 15, the +time frame that other outlets previously reported, according to +several sources. It is not clear if the event was moved or if this +was the same timing as Apple had always planned.

+
+ +

Swisher doesn’t have the exact date, although the <title> tag on her story reads “Apple Product Event Will Be Held March 22”. John Paczkowski (who usually gets these leaks first), confirms the week change, and says the event will be on Monday 21 March:

+ +
+

Sources in position to know say the company has settled on March +21st as the date it will show off a handful of new products. These +people declined to say why Apple postponed the date by a week, but +it’s worth noting that it is one day prior to the company’s March +22 showdown with the government over a motion to compel it to help +hack the iPhone used by one of the San Bernardino terrorists.

+
+ +

For what it’s worth, last year’s March event was on a Monday as well.

+ +

Update: Jim Dalrymple:

+ +
+

This sounds right to me.

+
+ + + + ]]>
+
+ Manuscripts and Findings + + + + tag:daringfireball.net,2016:/linked//6.32172 + 2016-02-27T00:11:11Z + 2016-02-27T00:13:11Z + + John Gruber + http://daringfireball.net/ + + My thanks to Nucleobytes for sponsoring this week’s DF RSS feed. Nucleobytes is a fascinating company. They specialize in creating Mac and iOS software for scientists and researchers, and they do it with great style — their apps have won multiple Apple Design Awards.

+ +

Their latest creations are two apps for researchers, useful for anyone who researches anything from lab results, cooking recipes, or research for blog posts: Manuscripts and Findings.

+ +
    +
  • Manuscripts is a writing tool that helps you concentrate on your story. Outline, plan and edit your project, insert figures, tables and math, then format citations using a killer workflow. Manuscripts supports both importing and exporting Markdown, Word, LaTeX, and HTML.

  • +
  • Findings is a lab notebook app that helps you keep a journal of your research, connected to notes, photos, and files. Plan your week, track progress, and share your findings with your colleagues or the world.

  • +
+ +

Try the free basic versions, and use coupon DARINGFIREBALL for a special discount on the unlimited versions, this week only. (They have an even better offer for students.)

+ + + + ]]>
+
+ Donald Trump Vows to ‘Open Up’ Libel Laws + + + + tag:daringfireball.net,2016:/linked//6.32171 + 2016-02-26T21:47:27Z + 2016-02-26T21:47:28Z + + John Gruber + http://daringfireball.net/ + + Hadas Gold, writing for Politico:

+ +
+

During a rally in Fort Worth, Texas, Trump began his usual tirade +against newspapers such as The New York Times and The Washington +Post, saying they’re “losing money” and are “dishonest.” The +Republican presidential candidate then took a different turn, +suggesting that when he’s president they’ll “have problems.”

+ +

“One of the things I’m going to do if I win, and I hope we do and +we’re certainly leading. I’m going to open up our libel laws so +when they write purposely negative and horrible and false +articles, we can sue them and win lots of money. We’re going to +open up those libel laws. So when The New York Times writes a hit +piece which is a total disgrace or when The Washington Post, which +is there for other reasons, writes a hit piece, we can sue them +and win money instead of having no chance of winning because +they’re totally protected,” Trump said.

+
+ +

Not worrisome at all. No sir.

+ + + + ]]>
+
+ Most Android Phones Are Not Encrypted + + + + tag:daringfireball.net,2016:/linked//6.32170 + 2016-02-26T17:43:11Z + 2016-02-28T21:06:52Z + + John Gruber + http://daringfireball.net/ + + Jose Pagliery, writing for CNN Money:

+ +
+

Although 97% of Android phones have encryption as an option, less +than 35% of them actually got prompted to turn it on when they +first activated the phone. Even then, not everybody chooses that +extra layer of security.

+ +

A Google spokesman said that encryption is now required for all +“high-performing devices” — like the Galaxy S7 — running the +latest version of Android, Marshmallow. But only 1.2% of Android +phones even have that version, according to Google.

+ +

By comparison, most Apple products are uniformly secure: 94% of +iPhones run iOS 8 or 9, which encrypt all data. Apple (AAPL, +Tech30) makes its devices, designs the software, and retains full +control of the phone’s operating system.

+ +

“If a person walks into a Best Buy and walks out with an iPhone, +it’s encrypted by default. If they walk out with an Android phone, +it’s largely vulnerable to surveillance,” said Christopher +Soghoian, the principal technologist at the American Civil +Liberties Union.

+
+ +

Google is moving in the right direction, but here’s an area where the slow uptake of new versions of Android has a serious effect.

+ + + + ]]>
+
+ 9to5Mac: ‘Apple Likely to Drop the “5”, Call New 4-Inch Model the “iPhone SE”’ + + + + tag:daringfireball.net,2016:/linked//6.32169 + 2016-02-26T17:24:11Z + 2016-02-26T18:32:34Z + + John Gruber + http://daringfireball.net/ + + Mark Gurman:

+ +
+

In January, we reported that Apple is preparing a new 4-inch +iPhone that is essentially 2013’s iPhone 5s with upgraded +internals. At the time, we heard that Apple would call the device +the “iPhone 5se” based on it being both an enhanced and “special +edition” version of the iPhone 5s. Now, we are hearing that Apple +appears to be going all in on the special edition factor: sources +say that Apple has decided to drop the “5” from the device’s name +and simply call it the “iPhone SE.” This will mark the first +iPhone upgrade without a number in its name and would logically +remove it from a yearly update cycle.

+
+ +

A few points:

+ +
    +
  • Apple was never going to call this phone the “5 SE”. I don’t know where Gurman got that, but that was never going to happen. Why would Apple give a new phone a name that makes it sound old?

  • +
  • Isn’t it more accurate to think of this as an iPhone 6S in a 4-inch body than as an iPhone 5S with “upgraded internals”? Other than the display, aren’t the “internals” the defining characteristics of any iPhone?

  • +
  • Dropping the number entirely fits with my theory that this phone is intended to remain on the market for 18-24 months.

  • +
+ + + + ]]>
+
+ Gogo Wi-Fi and Email Security + + + + tag:daringfireball.net,2016:/linked//6.32168 + 2016-02-26T17:12:34Z + 2016-02-26T19:00:17Z + + John Gruber + http://daringfireball.net/ + + Reporter Steven Petrow published a scary first-hand tale in USA Today, claiming that his email was hacked by another passenger on a Gogo-enabled flight. The implication was that you shouldn’t use email on Gogo unless you’re using a VPN.

+ +

But Petrow’s email didn’t get intercepted because of some flaw with Gogo. It got intercepted because he wasn’t connecting to the POP or SMTP servers via SSL. In fact, his email provider, Earthlink, doesn’t even support SSL for email.

+ +

Robert Graham at Errata Security explains:

+ +
+

Early Internet stuff wasn’t encrypted, because encryption was +hard, and it was hard for bad guys to tap into wires to eavesdrop. +Now, with open WiFi hotspots at Starbucks or on the airplane, it’s +easy for hackers to eavesdrop on your network traffic. +Simultaneously, encryption has become a lot easier. All new +companies, those still fighting to acquire new customers, have +thus upgraded their infrastructure to support encryption. Stagnant +old companies, who are just milking their customers for profits, +haven’t upgraded their infrastructure.

+ +

You see this in the picture below. Earthlink supports older +un-encrypted “POP3” (for fetching email from the server), but not +the new encrypted POP3 over SSL. Conversely, GMail doesn’t support +the older un-encrypted stuff (even if you wanted it to), but only +the newer encrypted version.

+
+ +

Gogo is far from perfect, but it certainly wasn’t at fault in this case.

+ +

Update: Like a lot of you, I’m not even sure I buy the whole story. Whole thing seems fishy.

+ + + + ]]>
+
+ Google, Facebook, Twitter, and Microsoft Plan to Support Apple + + + + tag:daringfireball.net,2016:/linked//6.32167 + 2016-02-25T22:56:47Z + 2016-02-25T22:56:48Z + + John Gruber + http://daringfireball.net/ + + Deepa Seetharaman and Jack Nicas, reporting for the WSJ:

+ +
+

Several tech companies, including Google parent Alphabet Inc., +Facebook Inc. and Microsoft Corp., plan to file a joint motion +supporting Apple Inc. in its court fight against the Justice +Department over unlocking an alleged terrorist’s iPhone, according +to people familiar with the companies’ plans.

+ +

At least one other tech company plans to be included in a joint +amicus brief next week generally supporting Apple’s position that +unlocking the iPhone would undermine tech firms’ efforts to +protect their users’ digital security, these people said. Twitter +Inc. also plans to support Apple in a motion, though it is unclear +if it will join the combined filing, another person familiar said.

+ +

Microsoft President and Chief Legal Officer Brad Smith told +Congress on Thursday that his company would file a motion +supporting Apple.

+
+ +

Nice.

+ + + + ]]>
+
+ Apple’s Motion to Vacate FBI Order + + + + tag:daringfireball.net,2016:/linked//6.32166 + 2016-02-25T20:24:56Z + 2016-02-25T20:25:28Z + + John Gruber + http://daringfireball.net/ + + A clear, cogent read. I often shy away from reading legal motions because they’re so often written in dense legalese, but this one is clear.

+ +

This stuck out to me:

+ +
+

Congress knows how to impose a duty on third parties to facilitate +the government’s decryption of devices. Similarly, it knows +exactly how to place limits on what the government can require of +telecommunications carriers and also on manufacturers of telephone +equipment and handsets. And in CALEA, Congress decided not to +require electronic communication service providers, like Apple, to +do what the government seeks here. Contrary to the government’s +contention that CALEA is inapplicable to this dispute, Congress +declared via CALEA that the government cannot dictate to providers +of electronic communications services or manufacturers of +telecommunications equipment any specific equipment design or +software configuration.

+ +

In the section of CALEA entitled “Design of features and systems +configurations,” 47 U.S.C. § 1002(b)(1), the statute says that it +“does not authorize any law enforcement agency or officer —

+ +
+

(1) to require any specific design of equipment, facilities, + services, features, or system configurations to be adopted by + any provider of a wire or electronic communication service, + any manufacturer of telecommunications equipment, or any + provider of telecommunications support services.

+ +

(2) to prohibit the adoption of any equipment, facility, service, + or feature by any provider of a wire or electronic + communication service, any manufacturer of telecommunications + equipment, or any provider of telecommunications support + services.

+
+
+ +

What Apple is arguing is that the All Writs Act is intended only to fill the gaps covering scenarios not covered by other laws, but CALEA (the Communications Assistance for Law Enforcement Act) is a law that was passed specifically to cover exactly this sort of scenario. This strikes me as a very compelling argument.

+ + + + ]]>
+
+ Microsoft Will File Amicus Brief Supporting Apple + + + + tag:daringfireball.net,2016:/linked//6.32165 + 2016-02-25T18:59:14Z + 2016-02-25T18:59:15Z + + John Gruber + http://daringfireball.net/ + + Dina Bass, reporting for Bloomberg:

+ +
+

Microsoft Corp. backs Apple Inc. in its fight with the U.S. +government over unlocking a terrorist’s iPhone, said President and +Chief Legal Officer Brad Smith.

+ +

The company will file an amicus brief to support Apple next week, +Smith said at a congressional hearing to discuss the need for new +legislation to govern privacy, security and law enforcement in the +age of Internet-based cloud services.

+
+ +

Nice.

+ + + + ]]>
+
+ Apple to Tighten iCloud Backup Encryption + + + + tag:daringfireball.net,2016:/linked//6.32164 + 2016-02-25T18:02:44Z + 2016-02-25T18:02:45Z + + John Gruber + http://daringfireball.net/ + + Tim Bradshaw, reporting for the Financial Times:

+ +
+

Apple is working on new ways to strengthen the encryption of +customers’ iCloud backups in a way that would make it impossible +for the company to comply with valid requests for data from law +enforcement, according to people familiar with its plans.

+ +

The move would bolster Apple customers’ security against hackers +but also frustrate investigators who are currently able to obtain +data from Apple’s servers through a court order. Apple has +complied with thousands of such orders in the past.

+ +

Developing such technology is in some ways more complex than +adding the kind of device-level security that Apple introduced to +the iPhone in 2014 with its iOS 8 update.

+ +

Building new protections that mean Apple no longer has access to +iCloud encryption keys may inconvenience some customers. Such a +change would most likely mean that customers who forget their +iCloud password may be left unable to access their photos, +contacts and other personal information that is backed up to +Apple’s systems.

+
+ + + + ]]>
+
+ The Dangerous All Writs Act Precedent in the Apple Encryption Case + + + + tag:daringfireball.net,2016:/linked//6.32163 + 2016-02-25T17:07:13Z + 2016-02-25T17:07:15Z + + John Gruber + http://daringfireball.net/ + + Amy Davidson, writing for The New Yorker:

+ +
+

It is essential to this story that the order to Apple is not a +subpoena: it is issued under the All Writs Act of 1789, which says +that federal courts can issue “all writs necessary or appropriate +in aid of their respective jurisdictions and agreeable to the +usages and principles of law.” Read as a whole, this simply means +that judges can tell people to follow the law, but they have to do +so in a way that, in itself, respects the law. The Act was written +at a time when a lot of the mechanics of the law still had to be +worked out. But there are qualifications there: warnings about the +writs having to be “appropriate” and “agreeable,” not just to the +law but to the law’s “principles.” The government, in its use of +the writ now, seems to be treating those caveats as background +noise. If it can tell Apple, which has been accused of no +wrongdoing, to sit down and write a custom operating system for +it, what else could it do?

+
+ +

Lost amid the technical debate over encryption is the legal debate over this incredibly broad application of the All Writs Act.

+ + + + ]]>
+
+ Twitter’s Missing Manual + + + + tag:daringfireball.net,2016:/linked//6.32162 + 2016-02-25T16:45:49Z + 2016-02-25T16:45:50Z + + John Gruber + http://daringfireball.net/ + + Eevee:

+ +
+

Here, then, is a list of all the non-obvious things about Twitter +that I know. Consider it both a reference for people who aren’t up +to their eyeballs in Twitter, and an example of how these hidden +features can pile up. I’m also throwing in a couple notes on +etiquette, because I think that’s strongly informed by the shape +of the platform.

+
+ + + + ]]>
+
+ Sharp Accepts Foxconn Takeover Bid + + + + tag:daringfireball.net,2016:/linked//6.32161 + 2016-02-25T05:21:30Z + 2016-02-25T17:46:42Z + + John Gruber + http://daringfireball.net/ + + Huge news for both companies. Interesting for Apple, too.

+ +

Update:

+ +
+

A deal to take over Japanese electronics giant Sharp by Taiwanese +manufacturer Foxconn, has been thrown into question by a last +minute delay.

+ +

Foxconn said it had received new information from Sharp which +needed to be clarified.

+
+ +

Whoops.

+ + + + ]]>
+
+ The Next Step in iPhone Impregnability + + + + tag:daringfireball.net,2016:/linked//6.32160 + 2016-02-25T03:26:27Z + 2016-02-25T04:35:17Z + + John Gruber + http://daringfireball.net/ + + Matt Apuzzo and Katie Benner, reporting for the NYT:

+ +
+

Apple engineers have already begun developing new security +measures that would make it impossible for the government to break +into a locked iPhone using methods similar to those now at the +center of a court fight in California, according to people close +to the company and security experts.

+ +

If Apple succeeds in upgrading its security — and experts say it +almost surely will — the company would create a significant +technical challenge for law enforcement agencies, even if the +Obama administration wins its fight over access to data stored on +an iPhone used by one of the killers in last year’s San +Bernardino, Calif., rampage. The F.B.I. would then have to find +another way to defeat Apple security, setting up a new cycle of +court fights and, yet again, more technical fixes by Apple. […]

+ +

Apple built its recent operating systems to protect customer +information. As its chief executive, Timothy D. Cook, wrote in a +recent letter to customers, “We have even put that data out of our +own reach, because we believe the contents of your iPhone are none +of our business.”

+ +

But there is a catch. Each iPhone has a built-in troubleshooting +system that lets the company update the system software without +the need for a user to enter a password. Apple designed that +feature to make it easier to repair malfunctioning phones.

+
+ +

The way the iPhone works today, when put into recovery mode you can restore the operating system without entering the device passcode. The only restriction is that the version of iOS to be installed must be properly signed by Apple.

+ +

I just tried it here with my old iPhone 6, which had been turned off for weeks. I powered it up, but did not unlock it. I put it in recovery mode, and then updated it to iOS 9.3 beta 4. Then it restarted. Now it’s running iOS 9.3 beta 4, and I still have not unlocked it. All my data is still on the phone — but it’s running a new version of iOS, without my having unlocked it.

+ +

What the FBI wants Apple to do is create (and sign) a new version of iOS that they can force the San Bernardino suspect’s phone to install as an update — and this new version of iOS will allow them to easily brute-force the passcode.

+ +

I think what Apple is leaking here is that they’re going to change this (perhaps as soon as this year’s new iPhone 7), so that you can’t install a new version of iOS, even in recovery mode, without entering the device’s passcode. (I think they will also do the same for firmware updates to the code that executes on the Secure Enclave — it will require a passcode lock.)

+ +

If you do a full restore, you can install a new version of the OS without the passcode, but this wipes the data. See also: Activation Lock, which allows you to bypass the passcode to completely wipe an iPhone, but requires you to sign into iCloud before you can use it.

+ + + + ]]>
+
+ Scalia in 1987: ‘The Constitution Sometimes Insulates the Criminality of a Few in Order to Protect the Privacy of Us All’ + + + + tag:daringfireball.net,2016:/linked//6.32159 + 2016-02-25T02:53:17Z + 2016-02-25T02:53:18Z + + John Gruber + http://daringfireball.net/ + + NYT report on a 6-3 Supreme Court decision in 1987:

+ +
+

Justice Scalia’s opinion was forcefully denounced as an +unjustified obstacle to law enforcement in dissenting opinions by +Associate Justices Sandra Day O’Connor and Lewis F. Powell Jr. +Chief Justice Rehnquist joined in both of the dissents.

+ +

Justice Scalia, however, said, “There is nothing new in the +realization that the Constitution sometimes insulates the +criminality of a few in order to protect the privacy of us +all.” […]

+ +

Justice Scalia’s majority opinion today said that although the +search for weapons was lawful — a shot had just been fired through +the floor of the apartment, injuring a man below — the police were +not justified in moving the stereo components even slightly to +check the serial numbers without “probable cause” to believe they +were stolen. He thus affirmed a ruling by an Arizona appellate +court that the stereo components, which turned out to have been +stolen in an armed robbery, could not be used as evidence against +the occupant of the apartment.

+ +

Associate Justice William J. Brennan Jr., the Court’s senior +member, who is its leading liberal, apparently assigned Justice +Scalia to write the majority opinion, which he joined. Under the +Supreme Court’s procedures, the Chief Justice assigns opinions +when he is in the majority. When the Chief Justice dissents, as +in the Arizona case, the senior member of the majority has +assignment power.

+
+ +

Conservative judges, as a general rule, tend to side with law enforcement in search and seizure cases. Scalia was certainly a conservative, but by no means was he in lockstep with them.

+ + + + ]]>
+
+ ABC News Posts Extensive Interview With Tim Cook on FBI/iPhone Case + + + + tag:daringfireball.net,2016:/linked//6.32158 + 2016-02-25T00:59:51Z + 2016-02-25T02:17:14Z + + John Gruber + http://daringfireball.net/ + + Solid, thorough, and I think very fair interview by David Muir. Cook made his case about as well as it could be made — a passionate defense of civil liberties. It’s 30 minutes long and worth every minute of it.

+ + + + ]]>
+
+ Former Bush Administration Official Argues Supreme Court Should Count Scalia’s Vote in Pending Cases + + + + tag:daringfireball.net,2016:/linked//6.32157 + 2016-02-24T22:46:11Z + 2016-02-24T22:46:12Z + + John Gruber + http://daringfireball.net/ + + This is how we get from here to there.

+ + + + ]]>
+
+ David Ortiz Makes a Final Plea to Yankees Fans + + + + tag:daringfireball.net,2016:/linked//6.32156 + 2016-02-24T22:02:58Z + 2016-02-24T22:17:46Z + + John Gruber + http://daringfireball.net/ + + Kevin Kernan, writing for the NY Post:

+ +
+

When Ortiz, 40, makes his final Yankee Stadium appearance on Sept. +29, this is what he wants, and it speaks volumes about Ortiz the +player, the competitor, the enemy, the star.

+ +

“You know what I want most of all?’’ Big Papi told The Post on +Tuesday at JetBlue Park. “I would love it if the fans at Yankee +Stadium gave me a standing ovation.’’

+ +

That’s what he wants, and that would be the perfect tribute to +Ortiz, who owns 503 home runs.

+
+ +

I would wholeheartedly join in that ovation. Great player, great rival, and his retirement really marks the end of the epic Yankees-Sox rivalry from the early 2000s. I would expect appearances from Derek Jeter, Mariano Rivera, Jorge Posada, and Joe Torre. Just thinking about it makes me want to buy tickets.

+ + + + ]]>
+
+ Spotify Moves Infrastructure to Google Cloud Platform + + + + tag:daringfireball.net,2016:/linked//6.32155 + 2016-02-24T03:01:04Z + 2016-02-24T03:01:06Z + + John Gruber + http://daringfireball.net/ + + You heard it here first: this presages Google acquiring Spotify. (I heard it from Om Malik first.)

+ + + + ]]>
+
+ Was Pew’s Polling Question on the Apple/FBI Debate Misleading? + + + + tag:daringfireball.net,2016:/linked//6.32154 + 2016-02-23T22:16:51Z + 2016-02-23T22:16:52Z + + John Gruber + http://daringfireball.net/ + + Mike Masnick, writing for TechDirt:

+ +
+

The question asked was

+ +
+

As you may know, RANDOMIZE: [the FBI has said that accessing the +iPhone is an important part of their ongoing investigation into +the San Bernardino attacks] while [Apple has said that unlocking +the iPhone could compromise the security of other users’ +information] do you think Apple [READ; RANDOMIZE]?

+ +

(1) Should unlock the iPhone (2) Should not unlock the iPhone (3) + Don’t Know.

+
+ +

But that’s not the issue in this case!

+ +

As noted in the past, when it’s possible for Apple to get access +to data, it has always done so in response to lawful court orders. +That’s similar to almost every other company as well. This case is +different because it’s not asking Apple to “unlock the iPhone.” +The issue is that Apple cannot unlock the iPhone and thus, the FBI +has instead gotten a court order to demand that Apple create an +entirely new operating system that undermines the safety and +security of iPhones, so that the FBI can hack into the iPhone. +That’s a really different thing.

+
+ +

He makes a good point. But when it comes to public polling on an issue like this, you can’t expect the public to understand the technical issues. Ideally, yes, the language used by Pew would have been much more precise. But basically what they were asking is “Do you think Apple should do whatever the FBI wants them to do to get the information from the San Bernardino suspect’s iPhone?” For polling purposes, I don’t think it matters much what “whatever” is.

+ +

It’s true that if phrased differently, it’s quite possible you’d get a polling showing more support for Apple. But the bottom line is that a lot of Americans think Apple should just do what the FBI is asking them to do.

+ + + + ]]>
+
+ On Ribbons and Ribbon Cutters + + + + tag:daringfireball.net,2016:/linked//6.32153 + 2016-02-23T22:00:23Z + 2016-02-23T22:00:24Z + + John Gruber + http://daringfireball.net/ + + Jonathan Zdziarski (who has been killing it with his analysis of the Apple/FBI fight):

+ +
+

With most non-technical people struggling to make sense of the +battle between FBI and Apple, Bill Gates introduced an excellent +analogy to explain cryptography to the average non-geek. Gates +used the analogy of encryption as a “ribbon around a hard drive”. +Good encryption is more like a chastity belt, but since Farook +decided to use a weak passcode, I think it’s fair here to call it +a ribbon. In any case, let’s go with Gates’s ribbon analogy. […]

+ +

Instead of cutting the ribbon, which would be a much simpler task, +FBI is ordering Apple to invent a ribbon cutter — a forensic tool +capable of cutting the ribbon for FBI, and is promising to use it +on just this one phone. In reality, there’s already a line +beginning to form behind Comey should he get his way.

+
+ + + + ]]>
+
+ Apple to Restore UI Navigation With Pencil in Next iOS 9.3 Beta + + + + tag:daringfireball.net,2016:/linked//6.32152 + 2016-02-23T21:37:34Z + 2016-02-23T22:26:45Z + + John Gruber + http://daringfireball.net/ + + That didn’t take long. Apple, in a statement to iMore and a few other publications:

+ +
+

Apple Pencil has been a huge hit with iPad Pro users, who love it +for drawing, annotating and taking notes,” an Apple spokesperson +told iMore. “We believe a finger will always be the primary way +users navigate on an iPad, but we understand that some customers +like to use Apple Pencil for this as well and we’ve been working +on ways to better implement this while maintaining compatibility +during this latest beta cycle. We will add this functionality back +in the next beta of iOS 9.3.

+
+ +

One thing I take away from the vocal reaction to this: the Apple Pencil and iPad Pro have passionate users.

+ + + + ]]>
+
+ Apple vs. FBI: ‘Just This Once’? + + + + tag:daringfireball.net,2016:/linked//6.32151 + 2016-02-23T21:20:18Z + 2016-02-23T21:20:19Z + + John Gruber + http://daringfireball.net/ + + Julian Sanchez, writing for Just Security:

+ +
+

Consider: Possibly the next iPhone simply eliminates Apple’s +ability to assist in any way. But it’s hard to imagine a +scenario where the designer and key-holder for a device designed +to be used by normal humans can do literally nothing, at the +margin, to assist an attacker. That means every improvement in +device security involves a gamble: Maybe the cost of developing +new ways to attack the newly hardened device becomes so high that +the courts recognize it as an “undue burden” and start quashing +(or declining to issue) All Writs Act orders to compel hacking +assistance. Maybe. But Apple is a very large, very rich company, +and much of the practical “burden” comes from the demands of +complying securely and at scale. The government will surely +continue arguing in future cases that the burden of complying +just this one time are not so great for a huge tech company like +Apple. (And, to quote The Smiths, they’ll never never do it +again — of course they won’t; not until the next time.)

+
+ +

Sanchez makes an interesting point here about Apple being disincentivized from improving iPhone security if they lose this case. Imagine if Apple made safes, but the government could compel them to crack their own safes under warrant. The harder they make these safes to crack, the more work they bring upon themselves when compelled to crack them.

+ +

I don’t think Apple would succumb to that and stop improving their device security, but it shows what an untenable position the government is trying to put Apple in. The only easy way out for Apple, if they lose, is to stop making iPhones truly secure.

+ + + + ]]>
+
+ High-Profile Attorney Ted Olson Joins Apple’s Fight Against FBI Terror Probe + + + + tag:daringfireball.net,2016:/linked//6.32150 + 2016-02-23T20:49:09Z + 2016-02-23T21:11:40Z + + John Gruber + http://daringfireball.net/ + + Taylor Goldenstein, reporting for the LA Times:

+ +
+

Olson and Theodore J. Boutrous Jr. are the attorneys of record +representing Apple, according to a court filing. Boutrous and +Olson worked together to fight California’s previous ban on +same-sex marriage.

+ +

Olson is best known for successfully arguing on behalf of George +W. Bush in the Supreme Court case Bush vs. Gore, which decided the +2000 presidential election, and for challenging California’s +Proposition 8, the measure that banned gay marriage, before the +Supreme Court.

+
+ +

Olson is truly an extraordinary figure, both in terms of his career (winning landmark cases for conservatives, like Bush v. Gore and Citizens United; then winning the case that legalized gay marriage nationwide), and his personal life (his wife was a passenger on the plane that crashed into the Pentagon on 9/11).

+ + + + ]]>
+
+ iOS 9.3 Betas Remove the Ability to Navigate iPad UI With Apple Pencil + + + + tag:daringfireball.net,2016:/linked//6.32149 + 2016-02-23T19:24:01Z + 2016-02-23T21:39:01Z + + John Gruber + http://daringfireball.net/ + + Serenity Caldwell, at iMore:

+ +
+

Unfortunately, whether by bug or intentional design, the Pencil’s +navigational prowess appears to have vanished in the iOS 9.3 +public betas. With 9.3, you can no longer scroll or manipulate +text; the only places the Pencil works are on canvas or when +pressing digital buttons.

+ +

Normally, I don’t write about beta bugs and features, because it’s +a beta: There are always bugs, and features change. But this +functionality is important enough that I wanted to talk about it +before Apple submits its final 9.3 release. It could be a bug, +yes: But several betas in, we’ve seen fixes for Smart Connector +keyboards and new features, and the Pencil remains crippled. Which +makes me think, more and more, that this is a conscious decision +on the part of Apple’s engineering team. (I did reach out to the +company about the issue, and will update if and when I receive a +response.)

+
+ +

Myke Hurley and CGP Grey talk about this on the latest episode of their podcast, Cortex. Grey says:

+ +
+

Sources in the know confirm that removing the functionality of the +Apple Pencil is a decision inside of Apple. It is not a bug they +have overlooked for three betas. It is a decision.

+
+ +

My only guess as to why Apple would change this is that they want to enable you to scroll/pan (with your finger) while drawing/marking-up with the Pencil. If so, the mistake wasn’t making this change in iOS 9.3 — the mistake was allowing the Pencil to control the UI in the first place.

+ +

I hate to say it, but now that iPad Pro users have gotten used to using the Pencil to navigate the UI, maybe it should be a setting? Maybe under Accessibility? Grey, for example, says using the Pencil to navigate the UI helps him avoid RSI pain.

+ +

Update, two hours later: Apple has told The Verge that UI navigation via Pencil will return in the next iOS 9.3 beta.

+ + + + ]]>
+
+ Bill Gates Breaks Ranks Over FBI Apple Request + + + + tag:daringfireball.net,2016:/linked//6.32148 + 2016-02-23T19:00:59Z + 2016-02-23T19:45:52Z + + John Gruber + http://daringfireball.net/ + + Stephen Foley and Tim Bradshaw, writing for The Financial Times:

+ +
+

“This is a specific case where the government is asking for +access to information. They are not asking for some general +thing, they are asking for a particular case,” Mr Gates told the +Financial Times.

+ +

“It is no different than [the question of] should anybody ever +have been able to tell the phone company to get information, +should anybody be able to get at bank records. Let’s say the bank +had tied a ribbon round the disk drive and said, ‘Don’t make me +cut this ribbon because you’ll make me cut it many times’.”

+
+ +

Gates is so smart — surely he understands that if the FBI prevails, this will set precedent that will be used again and again. It seems to me he’s arguing that we should not be allowed to have devices protected by strong encryption.

+ +

Update: Gates said today he thinks the FT mischaracterized his position, but I’m not really seeing it. He certainly isn’t siding with Apple — his stance seems, at best, lukewarm, like Sundar Pichai’s.

+ + + + ]]>
+
+ Poll Shows More Support for Justice Department Than for Apple + + + + tag:daringfireball.net,2016:/linked//6.32147 + 2016-02-23T18:54:44Z + 2016-02-24T03:22:36Z + + John Gruber + http://daringfireball.net/ + + Pew Research Center:

+ +
+

As the standoff between the Department of Justice and Apple Inc. +continues over an iPhone used by one of the suspects in the San +Bernardino terrorist attacks, 51% say Apple should unlock the +iPhone to assist the ongoing FBI investigation. Fewer Americans +(38%) say Apple should not unlock the phone to ensure the security +of its other users’ information; 11% do not offer an opinion on +the question.

+ +

News about a federal court ordering Apple to unlock the suspect’s +iPhone has registered widely with the public: 75% say they have +heard either a lot (39%) or a little (36%) about the situation.

+
+ +

This is exactly why Apple’s stance on this issue is so commendable. They’re doing what they believe to be right, even though it is unpopular.

+ + + + ]]>
+
+ WSJ: ‘Justice Department Seeks to Force Apple to Extract Data From About 12 Other iPhones’ + + + + tag:daringfireball.net,2016:/linked//6.32146 + 2016-02-23T18:53:40Z + 2016-02-23T18:53:41Z + + John Gruber + http://daringfireball.net/ + + Devlin Barrett, reporting for the WSJ:

+ +
+

The Justice Department is pursuing court orders to make Apple Inc. +help investigators extract data from iPhones in about a dozen +undisclosed cases around the country, in disputes similar to the +current battle over a terrorist’s locked phone, according to a +newly-unsealed court document.

+ +

The other phones are evidence in cases where prosecutors have +sought, as in the San Bernardino, Calif., terror case, to use an +18th-century law called the All Writs Act to compel the company to +help them bypass the passcode security feature of phones that may +hold evidence, according to a letter from Apple which was unsealed +in Brooklyn federal court Tuesday. […]

+ +

The letter doesn’t describe the specific types of criminal +investigations related to those phones, but people familiar with +them said they don’t involve terrorism cases. The 12 cases remain +in a kind of limbo amid the bigger, more confrontational legal +duel between the government and the company over an iPhone seized +in the terror case in California, these people said.

+
+ +

But it’s really just about that one, single iPhone in the San Bernardino case.

+ + + + ]]>
+
+ ‘Absolutely Right’ + + + + tag:daringfireball.net,2016:/linked//6.32145 + 2016-02-23T18:36:27Z + 2016-02-23T18:39:51Z + + John Gruber + http://daringfireball.net/ + + Katie Benner and Matt Apuzzo, reporting for the NYT on whether the FBI’s request for Apple to unlock the San Bernardino shooter’s iPhone will open the door to more such requests:

+ +
+

In a note posted to its website on Monday, Apple reiterated that +the government’s request seems narrow but really isn’t. “Law +enforcement agents around the country have already said they have +hundreds of iPhones they want Apple to unlock if the F.B.I. wins +this case,” the company said.

+ +

To that point, the New York City police commissioner, William J. +Bratton, and the Manhattan district attorney, Cyrus R. Vance Jr., +criticized Apple after it refused to comply with the court order +and said that they currently possessed 175 iPhones that they could +not unlock.

+ +

Charlie Rose recently interviewed Mr. Vance and asked if he would +want access to all phones that were part of a criminal proceeding +should the government prevail in the San Bernardino case.

+ +

Mr. Vance responded: “Absolutely right.”

+
+ + + + ]]>
+
+ Mark Zuckerberg Stole Samsung’s Galaxy S7 Show + + + + tag:daringfireball.net,2016:/linked//6.32144 + 2016-02-23T03:46:31Z + 2016-02-23T03:46:32Z + + John Gruber + http://daringfireball.net/ + + Interesting marriage of convenience. Samsung has hardware but no interesting software. Facebook has interesting software but no hardware.

+ + + + ]]>
+
+ MDM Software Would Have Unlocked San Bernardino Shooter’s iPhone + + + + tag:daringfireball.net,2016:/linked//6.32143 + 2016-02-23T01:18:25Z + 2016-02-23T01:18:27Z + + John Gruber + http://daringfireball.net/ + + CBS News:

+ +
+

If the technology, known as mobile device management, had been +installed, San Bernardino officials would have been able to +remotely unlock the iPhone for the FBI without the theatrics of a +court battle that is now pitting digital privacy rights against +national security concerns.

+ +

The service costs $4 per month per phone.

+ +

Instead, the only person who knew the unlocking passcode for the +phone is the dead gunman, Syed Farook, who worked as an inspector +in the county’s public health department.

+
+ +

I had assumed they weren’t using MDM, but it’s good to have confirmation.

+ + + + ]]>
+
+ FBI Director James Comey Publishes Op-Ed on Apple/Encryption Case + + + + tag:daringfireball.net,2016:/linked//6.32141 + 2016-02-22T21:52:48Z + 2016-02-22T21:52:49Z + + John Gruber + http://daringfireball.net/ + + James Comey, in a brief op-ed published last night by Lawfare:

+ +
+

The San Bernardino litigation isn’t about trying to set a +precedent or send any kind of message. It is about the victims and +justice. Fourteen people were slaughtered and many more had their +lives and bodies ruined. We owe them a thorough and professional +investigation under law. That’s what this is. The American people +should expect nothing less from the FBI.

+
+ +

It is very difficult to take Comey’s opening sentence seriously. Everyone — on both sides of the issues — knows that this is about setting precedent.

+ +
+

The particular legal issue is actually quite narrow. The relief we +seek is limited and its value increasingly obsolete because the +technology continues to evolve. We simply want the chance, with a +search warrant, to try to guess the terrorist’s passcode without +the phone essentially self-destructing and without it taking a +decade to guess correctly. That’s it. We don’t want to break +anyone’s encryption or set a master key loose on the land. I hope +thoughtful people will take the time to understand that. Maybe the +phone holds the clue to finding more terrorists. Maybe it doesn’t. +But we can’t look the survivors in the eye, or ourselves in the +mirror, if we don’t follow this lead.

+
+ +

This is a purely emotional appeal. By Comey’s logic here, FBI agents should be considered above the law, able to pursue any and every avenue possible in the pursuit of information in a case with high stakes. That’s not how our system works. We are governed by the rule of law. Encryption is legal.

+ +

Ultimately, that is where Comey and the FBI are going to take this. They’re going to try to make strong encryption illegal.

+ + + + ]]>
+
+ In Internal Email, Apple CEO Tim Cook Says Refusal to Unlock iPhone Is an Issue of Civil Liberties + + + + tag:daringfireball.net,2016:/linked//6.32140 + 2016-02-22T21:07:23Z + 2016-02-22T21:07:24Z + + John Gruber + http://daringfireball.net/ + + Tim Cook, in a company-wide memo:

+ +
+

Apple is a uniquely American company. It does not feel right to be +on the opposite side of the government in a case centering on the +freedoms and liberties that government is meant to protect.

+ +

Our country has always been strongest when we come together. We +feel the best way forward would be for the government to +withdraw its demands under the All Writs Act and, as some in +Congress have proposed, form a commission or other panel of +experts on intelligence, technology and civil liberties to +discuss the implications for law enforcement, national security, +privacy and personal freedoms. Apple would gladly participate in +such an effort.

+
+ + + + ]]>
+
+ Apple Publishes FAQ on Their Fight Against the FBI + + + + tag:daringfireball.net,2016:/linked//6.32139 + 2016-02-22T21:06:12Z + 2016-02-22T21:06:13Z + + John Gruber + http://daringfireball.net/ + + Cogent.

+ + + + ]]>
+
+ + + +tag:daringfireball.net,2016:/feeds/sponsors//11.32142 + + Daring Fireball Department of Commerce + + +2016-02-22T20:11:01-05:00 +2016-02-22T20:11:03-05:00 + +Everybody does research. Whether you investigate gravitational waves, do fact-finding for blog posts, study the genetics of Rhagoletis pomonella, or design the next revolutionary cupcake recipe, you are a researcher.

+ +

Research needs great tools — that’s where we come in! We build innovative Mac and iOS apps for researchers, that even got us several Apple Design Awards. Today, we present our latest creations: Manuscripts and Findings.

+ +
    +
  • Manuscripts is a writing tool that helps you concentrate on your story. Outline, plan and edit your project, insert figures, tables and math, then format citations using a killer workflow. Import and export of Markdown, Word, LaTeX and HTML is included.

  • +
  • Findings is a lab notebook app that helps you keep a journal of your research, connected to notes, photos and files. Plan your week, track progress, and share your findings with your colleagues… or the world.

  • +
+ +

Try the free basic versions, and use coupon DARINGFIREBALL for a special discount on the unlimited versions, this week only. We hope you’ll like Manuscripts & Findings!

+ +]]>
+[Sponsor] Manuscripts and Findings: Your Research Is Ready for a Big Upgrade
+ The Talk Show: ‘iTools or Whatever’ + + + + tag:daringfireball.net,2016:/linked//6.32138 + 2016-02-21T23:15:40Z + 2016-02-21T23:15:43Z + + John Gruber + http://daringfireball.net/ + + For your enjoyment, a new episode of my podcast, with special guest Jim Dalrymple. Topics include the Apple/FBI legal showdown, the debate over Apple software quality, and more.

+ +

Sponsored by:

+ +
    +
  • Squarespace: Build it beautiful. Use code GRUBER for 10% off your first order.
  • +
  • Fracture: Photos printed in vivid color directly on glass. Use promo code TALKSHOW10 for 10% off your first order.
  • +
  • Harry’s: An exceptional shave at a fraction of the price. Use code TALKSHOW for $5 off your first purchase.
  • +
+ + + + ]]>
+
+ + + + tag:daringfireball.net,2016://1.32137 + 2016-02-21T22:05:40Z + 2016-02-21T22:45:23Z + + John Gruber + http://daringfireball.net/ + +The key point is that you do not have to unlock an iPhone to have it back up to iCloud. But a locked iPhone can’t back up to iCloud if the associated Apple ID password has been changed.

+]]>
+ The latest news in the Apple-FBI legal fight has resulted in much confusion. John Paczkowski, reporting for BuzzFeed:

+ +
+

The FBI has claimed that the password was changed by someone at +the San Bernardino Health Department. Friday night, however, +things took a further turn when the San Bernardino County’s +official Twitter account stated, “The County was working +cooperatively with the FBI when it reset the iCloud password at +the FBI’s request.”

+ +

County spokesman David Wert told BuzzFeed News on Saturday +afternoon the tweet was an authentic statement, but he had nothing +further to add.

+ +

The Justice Department did not respond to requests for comment on +Saturday; an Apple spokesperson said the company had no additional +comment beyond prior statements.

+
+ +

Here is what the FBI wrote in its legal motion, in a footnote on the four ways Apple suggested they obtain the data they seek:

+ +
+

(3) to attempt an auto-backup of the SUBJECT DEVICE with the + related iCloud account (which would not work in this case + because neither the owner nor the government knew the password + the iCloud account, and the owner, in an attempt to gain + access to some information in the hours after the attack, was + able to reset the password remotely, but that had the effect + of eliminating the possibility of an auto-backup);

+
+ +

To unpack this, the “owner” is not Syed Farook, the shooter. The iPhone at the center of this was supplied by Farook’s employer, the San Bernardino County Department of Public Health. They are the “owner”. The “government” is the federal government: the FBI and the Department of Justice.

+ +

The iPhone had been configured to back up to iCloud. However, at the time of the attack, it had not been backed up to iCloud for six weeks. Under warrant, Apple supplied the FBI with the data from that six-week-old backup. The FBI (for obvious reasons) would like the most recent six weeks of data from the phone, too.1

+ +

iCloud backups are triggered automatically when the phone is (a) on a known Wi-Fi network, and (b) plugged-in to power. Apple’s suggestion to the FBI was that if they took the iPhone to Farook’s office and plugged it in, it might trigger a backup. If that had worked, Apple could supply the FBI with the contents of that new backup, including the most recent six weeks of data.

+ +

It is not clear to me from any of the reports I have read why the iPhone had not been backed up in six weeks. It’s possible that Farook had disabled iCloud backups, in which case this whole thing is moot.2 But it’s also possible the only reason the phone hadn’t been backed up in six weeks is that it had not been plugged-in while on a known Wi-Fi network in six weeks. The phone would have to be unlocked to determine this, and the whole point of this fight is that the phone can’t be unlocked.

+ +

The FBI screwed this up by directing the San Bernardino County Department of Public Health to reset Farook’s Apple ID password. They did not, and apparently could not, change anything on the phone itself. But once they reset the Apple ID password, the phone could not back up to iCloud, because the phone needed to be updated with the newly-reset Apple ID password — and they could not do that because they can’t unlock the phone.

+ +

The key point is that you do not have to unlock an iPhone to have it back up to iCloud. But a locked iPhone can’t back up to iCloud if the associated Apple ID password has been changed.

+ +

Again, there are two password-type things at play here. The Apple ID (iCloud) password, and the four-digit device passcode locking the iPhone. The county, at the behest of the FBI, reset the Apple ID password. This did not allow them to unlock the iPhone, and, worse, it prevented the iPhone from initiating a new backup to iCloud.

+ +

How did the county reset Farook’s Apple ID password? We don’t know for sure, but the most likely answer is that if his Apple ID was his work-issued email account, then the IT department at the county could go to iforgot.apple.com, enter Farook’s work email address, and then access his email account to click the confirmation URL to reset the password.

+ +

In short:

+ +
    +
  • The data the FBI claims to want is on Farook’s iPhone.
  • +
  • They already have access to his iCloud account.
  • +
  • They might have been able to transfer the data on his iPhone to his iCloud account via an automated backup, but they can’t because they reset his Apple ID (iCloud) password.
  • +
+ +

The only possible explanations for this are incompetence or dishonesty on the part of the FBI. Incompetence, if they didn’t realize that resetting the Apple ID password could prevent the iPhone from backing up to iCloud. Dishonesty, if they directed the county to do this knowing the repercussions, with the goal of setting up this fight to force Apple to create a back door for them in iOS. I’m not sure which to believe at this point. I’d like to know exactly when this directive to reset the Apple ID password was given — ” in the hours after the attack” leaves a lot of wiggle room.

+ +
+
+
    +
  1. +

    Much (or all?) of the data stored on Apple’s iCloud backup servers is not encrypted. Or, if it is encrypted, it is encrypted in a way that Apple can decrypt. Apple has a PDF that describes the information available to U.S. law enforcement from iCloud, but to me it’s not clear exactly what is available under warrant. I would bet a large sum of money that Apple is hard at work on an iCloud backup system that does store data encrypted in a way that Apple cannot read it without the user’s Apple ID password. ↩︎

    +
  2. +
  3. +

    Another possibility: Farook’s iCloud storage was full. If this were the case, presumably Apple could have granted his account additional storage to allow a fresh backup to occur. But again, this became moot as soon as the county reset the Apple ID password at the behest of the FBI. ↩︎︎

    +
  4. +
+
+ + + + ]]>
+ ★ On the San Bernardino Suspect’s Apple ID Password Reset
+ White House Petition to Side With Apple in FBI Fight + + + + tag:daringfireball.net,2016:/linked//6.32136 + 2016-02-21T21:38:07Z + 2016-02-21T21:38:09Z + + John Gruber + http://daringfireball.net/ + + I don’t have high hopes for this (the Obama administration seems hopelessly tied to law enforcement on this subject), but I signed:

+ +
+

The FBI, is demanding that Apple build a “backdoor” to bypass +digital locks protecting consumer information on Apple’s popular +iPhones.

+ +

We the undersigned, oppose this order, which has implications far +beyond the legal case at hand.

+
+ + + + ]]>
+
+ New York Times Publishes Report on iPhone Security and China + + + + tag:daringfireball.net,2016:/linked//6.32135 + 2016-02-20T22:36:43Z + 2016-02-20T22:39:02Z + + John Gruber + http://daringfireball.net/ + + Katie Benner and Paul Mozer, reporting for the NYT and revisiting the topic excised from a report earlier this week:

+ +
+

In China, for example, Apple — like any other foreign company +selling smartphones — hands over devices for import checks by +Chinese regulators. Apple also maintains server computers in +China, but Apple has previously said that Beijing cannot view the +data and that the keys to the servers are not stored in China. In +practice and according to Chinese law, Beijing typically has +access to any data stored in China.

+ +

If Apple accedes to American law enforcement demands for opening +the iPhone in the San Bernardino case and Beijing asks for a +similar tool, it is unlikely Apple would be able to control +China’s use of it. Yet if Apple were to refuse Beijing, it would +potentially face a battery of penalties.

+ +

Analysts said Chinese officials were pushing for greater control +over the encryption and security of computers and phones sold in +the country, though Beijing last year backed off on some proposals +that would have required foreign companies to provide encryption +keys for devices sold in the country after facing pressure from +foreign trade groups.

+ +

“People tend to forget the global impact of this,” said Raman Jit +Singh Chima, policy director at Access Now, a nonprofit that works +for Internet freedoms. “The reality is the damage done when a +democratic government does something like this is massive. It’s +even more negative in places where there are fewer freedoms.”

+
+ +

Another way to look at this is a choice between the lesser of two evils. Is it a bad thing if law enforcement loses access to the contents of cell phones as state of the art for security increases? Yes. But it would be far, far worse — for entirely different reasons — if we eliminate true security by mandating back doors.

+ + + + ]]>
+
+ San Bernardino Officials: Apple ID Password for Terrorist’s iPhone Reset at FBI Request + + + + tag:daringfireball.net,2016:/linked//6.32134 + 2016-02-20T22:14:38Z + 2016-02-20T23:45:31Z + + John Gruber + http://daringfireball.net/ + + This story keeps getting weirder. John Paczkowski, at BuzzFeed:

+ +
+

The FBI has claimed that the password was changed by someone at +the San Bernardino Health Department. Friday night, however, +things took a further turn when the San Bernardino County’s +official Twitter account stated, “The County was working +cooperatively with the FBI when it reset the iCloud password at +the FBI’s request.”

+ +

County spokesman David Wert told BuzzFeed News on Saturday +afternoon the tweet was an authentic statement, but he had nothing +further to add.

+ +

The Justice Department did not respond to requests for comment on +Saturday; an Apple spokesperson said the company had no additional +comment beyond prior statements.

+
+ +

The additional wrinkle here is that when the FBI first revealed this, in this footnote (screenshot) of their legal motion (whole motion linked above, on “claimed”), they strongly implied that the San Bernardino Health Department did this on their own, like they were a bunch of yokels who panicked and did the wrong thing. Instead, it turns out, they were following the FBI’s instructions.

+ +

The FBI says this happened “in the hours after the attack”. My question: How many hours?

+ + + + ]]>
+
+ DevMate by MacPaw + + + + tag:daringfireball.net,2016:/linked//6.32133 + 2016-02-20T00:23:57Z + 2016-02-20T00:23:58Z + + John Gruber + http://daringfireball.net/ + + My thanks to MacPaw for sponsoring this week’s DF RSS feed to announce that their developer platform DevMate is now available free of charge. DevMate is a single SDK that provides a slew of back-end services for Mac developers: in-app purchasing, software licensing, update delivery, crash reports, user feedback, and more. Plus real-time analytics, with sales and downloads, are available from DevMate’s dashboard.

+ +

Among the indie Mac developers using DevMate for their apps are MacPaw themselves (for CleanMyMac), Smile Software, and Realmac. It’s a robust, dependable solution for developers who want to sell their Mac apps outside the App Store.

+ + + + ]]>
+
+ More Mac App Store Certificate Problems + + + + tag:daringfireball.net,2016:/linked//6.32132 + 2016-02-20T00:12:10Z + 2016-02-20T00:19:07Z + + John Gruber + http://daringfireball.net/ + + Lost amid the FBI/iPhone encryption hubbub was another bad week for the Mac App Store — apps just stopped launching, with the only solution being to delete the app(s) and re-install from the store. Michael Tsai (as usual) compiled a thorough roundup of information and commentary.

+ + + + ]]>
+
+ + + + tag:daringfireball.net,2016://1.32054 + 2016-02-04T01:28:15Z + 2016-02-04T16:33:43Z + + John Gruber + http://daringfireball.net/ + +Maybe we expect too much. But Apple’s hardware doesn’t have little problems like this.

+]]>
+ Following up on Walt Mossberg’s column regarding the quality of Apple’s first-party apps, Jim Dalrymple writes:

+ +
+

I understand that Apple has a lot of balls in the air, but they +have clearly taken their eye off some of them. There is absolutely +no doubt that Apple Music is getting better with each update to +the app, but what we have now is more of a 1.0 version than what +we received last year.

+ +

Personally, I don’t care much about all the celebrities that Apple +can parade around — I care about a music service that works. +That’s it.

+ +

If Apple Music (or any of the other software that has +problems) was the iPhone, it would never have been released in +the state it was.

+
+ +

Software and hardware are profoundly different disciplines, so it’s hard to compare them directly. But it seems obvious to me that Apple, institutionally, has higher standards for hardware design and quality than it does for software.

+ +

Maybe this is the natural result of the fact hardware standards must be high, because they can’t issue “hardware updates” over the air like they can with software. But the perception is now widespread that the balance between Apple’s hardware and software quality has shifted in recent years. I see a lot of people nodding their heads in agreement with Mossberg and Dalrymple’s pieces today.

+ +

We went over this same ground a year ago in the wake of Marco Arment’s “Apple Has Lost the Functional High Ground”, culminating in a really interesting (to me at least) discussion with Phil Schiller at my “Live From WWDC” episode of The Talk Show. That we’re still talking about it a year later — and that the consensus reaction is one of agreement — suggests that Apple probably does have a software problem, and they definitely have a perception problem.

+ +

I’ll offer a small personal anecdote. Overall I’ve had great success with iCloud Photo Library. I’ve got over 18,000 photos and almost 400 videos. And I’ve got a slew of devices — iPhones, iPads, and Macs — all using the same iCloud account. And those photos are available from all those devices. Except, a few weeks ago, I noticed that on my primary Mac, in Photos, at the bottom of the main “Photos” view, where it tells you exactly how many photos and videos you have, it said “Unable to Upload 5 Items”. Restarting didn’t fix it. Waiting didn’t fix it. And clicking on it didn’t do anything — I wanted to know which five items couldn’t be uploaded, and why. It seems to me that anybody in this situation would want to know those two things. But damned if Photos would tell me.

+ +

Eventually, I found this support thread which suggested a solution: you can create a Smart Group in Photos using “Unable to upload to iCloud Photo Library” as the matching condition. Bingo: five items showed up. (Two of them were videos for which the original files couldn’t be found; three of them were duplicates of photos that were already in my library.)

+ +

My little iCloud Photo Library syncing hiccup was not a huge deal — I was even lucky insofar as the two videos that couldn’t be found were meaningless. And I managed to find a solution. But it feels emblematic of the sort of nagging software problems people are struggling with in Apple’s apps. Not even the bug itself that led to these five items being unable to upload, but rather the fact that Photos knew about the problem but wouldn’t tell me the details I needed to fix it without my resorting to the very much non-obvious trick of creating a Smart Group to identify them. For me at least, “silent failure” is a big part of the problem — almost everything related to the whole discoveryd/mDNSresponder fiasco last year was about things that just silently stopped working.

+ +

Maybe we expect too much from Apple’s software. But Apple’s hardware doesn’t have little problems like this.

+ + + + ]]>
+ ★ Apple’s App Problem
+ + + + tag:daringfireball.net,2016://1.31994 + 2016-01-21T00:00:17Z + 2016-01-21T00:18:18Z + + John Gruber + http://daringfireball.net/ + +A year ago Apple sold 75 million iPhones in the fourth quarter of calendar 2015. There is no facility in the U.S. that can do that. There might not be anywhere in the world other than China that can operate at that sort of scale.

+]]>
+ Arik Hesseldahl, writing for Recode on Donald Trump’s “we’re gonna get Apple to start building their damn computers and things in this country, instead of in other countries” campaign promise:

+ +
+

Any honest presidential candidate regardless of party should say +clearly and indeed proudly that America doesn’t want these jobs to +come back. Final assembly jobs are low-skilled, low-paying +occupations; no American would wish to support a family on what +the jobs would pay. Workers at China’s Foxconn, which +manufacturers the iPhone, make about $402 per month after three +months of on-the-job probation. Even at the lowest minimum wage in +the U.S. — $5.15 an hour in Wyoming — American workers can’t +beat that.

+
+ +

It’s not that simple. These jobs are certainly menial, but they’re not low-skill. As Tim Cook said on 60 Minutes:

+ +
+

Charlie Rose: So if it’s not wages, what is it?

+ +

Tim Cook: It’s skill. […]

+ +

Charlie Rose: They have more skills than American workers? They +have more skills than —

+ +

Tim Cook: Now — now, hold on.

+ +

Charlie Rose: — German workers?

+ +

Tim Cook: Yeah, let me — let me — let me clear, China put an +enormous focus on manufacturing. In what we would call, you and I +would call vocational kind of skills. The U.S., over time, began +to stop having as many vocational kind of skills. I mean, you can +take every tool and die maker in the United States and probably +put them in a room that we’re currently sitting in. In China, you +would have to have multiple football fields.

+ +

Charlie Rose: Because they’ve taught those skills in their +schools?

+ +

Tim Cook: It’s because it was a focus of them — it’s a focus of +their educational system. And so that is the reality.

+
+ +

Wages are a huge factor, but for the sake of argument, let’s say Apple was willing to dip into its massive cash reserves and pay assembly line workers in the U.S. a good wage. Where would these U.S.-made iPhone be assembled? A year ago Apple sold 75 million iPhones in the fourth quarter of calendar 2014. There is no facility in the U.S. that can do that. There might not be anywhere in the world other than China that can operate at that sort of scale. That’s almost one million iPhones per day. 10 iPhones per second. Think about that.

+ +

You can say, well, Apple could dig even deeper into its coffers and build such facilities. And train tens of thousands of employees. But why would they? Part of the marvel of Apple’s operations is that they can assemble and sell an unfathomable number of devices but they’re not on the hook for the assembly plants and facilities. When iPhones go the way of the iPod in 10 or 15 or 20 years, Apple doesn’t have any factories to close or convert for other uses. Foxconn does.

+ +

The U.S. can’t compete with China on wages. It can’t compete on the size of the labor force. China has had a decades-long push in its education system to train these workers; the U.S. has not. And the U.S. doesn’t have the facilities or the proximity to the Asian component manufacturers.

+ +

The only way Apple could ever switch to U.S. assembly and manufacturing would be if they automated the entire process — to build machines that build the machines. That, in fact, is what NeXT did while they were in the hardware business. But NeXT only ever sold about 50,000 computers total. Apple needed to assemble 35,000 iPhones per hour last year.

+ +

So long as assembling these devices remains labor intensive, it has to happen in China. And if someday it becomes automated — if the machines are built by machines — by definition it’s not going to create manufacturing jobs.1

+ +
+
+
    +
  1. +

    I do wonder about the purported Apple car. Would that be assembled in China, too? The U.S. does have automobile manufacturing expertise. And a car is so utterly unlike any product Apple has ever made that I feel like anything is possible. ↩︎

    +
  2. +
+
+ + + + ]]>
+ ★ Why Apple Assembles in China
+ + + + tag:daringfireball.net,2015://1.31881 + 2015-12-11T21:19:40Z + 2015-12-15T00:38:58Z + + John Gruber + http://daringfireball.net/ + +Regarding Apple’s new Smart Battery Case for the iPhone 6/6S.

+]]>
+ Joanna Stern tested Apple’s new Smart Battery Case for five days, and likes it a lot:

+ +
+

Let’s get this out of the way: The bar for battery-case design is +extremely low. Most are chunky and made of black matte plastic, +requiring you to attach two pieces to your phone. You choose a +battery case for utility, not fashion.

+ +

Apple’s Smart Battery Case, though still fairly unsightly, is +ahead of those. Bend back the top and slide in your phone. It +feels just like Apple’s smooth, soft-touch wraparound silicone +case, except… with a protruding, awkward battery on the back. The +battery juts out as if your phone will soon give birth to a +rectangular alien.

+ +

Still, I’ll take it over all the ugly messes sold by Mophie, +Anker and others, especially since it provides better protection +for the phone. A lip curves just above the screen to prevent the +glass from hitting a hard surface and an interior lining provides +better shock absorption than hard plastic. Plus, the grippy +material is much easier to hold and doesn’t feel like it will +slip from my hands.

+
+ +

The Verge’s Lauren Goode disagrees:

+ +
+

Apple’s smart battery case is fine, then, if you want a softer +case or a “passive” battery charging experience, with zero control +over or understanding of how the case actually charges your phone. +Maybe that’s what Apple is hoping: that buyers of this thing will +slip it on and never take it off, charging their iPhones entirely +through the case’s Lightning port going forward, forgetting about +its big ol’ bump in the back. They will be pleased, finally, with +their iPhone 6’s or 6S’s battery life, and the memory of spending +an extra $99 for it, rather than having it just work that way in +the first place, will eventually fade away.

+ +

It’s fine if you don’t want exterior indicator lights, or a even a +case that gives you a 0 to 100 percent charge. After all, this one +was designed for the iPhone, by the same company that made your +iPhone. For some people, that’s a big draw.

+ +

In either case this will probably sell like hot cakes. It fits +nicely in holiday stockings. ’Tis the season. Just know that from +a pure performance and even a design perspective, Apple’s effort +is not the best you can get.

+
+ +

(I can almost see her eyes rolling as she typed those italicized words in the second quoted paragraph.)

+ +

Lewis Hilsenteger of Unbox Therapy best captured what most of us thought when we first saw it: “These things look weird.”

+ +

That was certainly my first impression when I got mine Tuesday morning. The looks-like-it’s-pregnant-with-an-iPod-Touch design is certainly curious. I think to understand why it looks like this we have to ask why it even exists:

+ +
    +
  • People who use their phones heavily — power users, if you will — struggle to get through a day on a single charge with the iPhone 6/6S.

  • +
  • The Plus models offer so much more battery life that getting through the day on a single charge isn’t a problem, even for power users who are on their phones all day long. But most people don’t want an iPhone that large.

  • +
  • Apple has long sold third-party battery cases in its stores, so they know how popular they are.

  • +
  • Existing battery cases all suffer from similar design problems, as outlined by Joanna Stern above. They make the entire device look and feel chunky, and most of them are built from materials that don’t feel good. None of them integrate in any way with the software on the iPhone, and most of them use micro USB instead of Lightning for charging the case.

  • +
  • Lastly, Apple claims the Smart Battery Case tackles a problem I wasn’t aware existed: that existing battery cases adversely affect cellular reception because they’re putting a battery between the phone’s antenna and the exterior of the case.

  • +
+ +

So I think Apple’s priorities for the Smart Battery Case were as follows — and the order matters:

+ +
    +
  1. Provides effective battery life equivalent to the iPhone 6S Plus.
  2. +
  3. Feels good in your hand.
  4. +
  5. Makes it easy and elegant to insert and remove the phone.
  6. +
  7. Works as a durable protective case.
  8. +
  9. Prevents the case’s battery from affecting cellular reception.
  10. +
  11. Looks good.
  12. +
+ +

That “looks good” is last on the list is unusual for an Apple product, to say the least. Looking good isn’t always first on Apple’s list of priorities, but it’s seldom far from the top. But in this case it makes sense: Apple sells great-looking silicone and leather cases for people who aren’t looking for a battery case, and all existing third-party battery cases are clunky in some way.

+ +

Ungainly though the case’s hump is, I can’t help but suspect one reason for it might be, counterintuitively, a certain vanity on the part of its designers. Not for the sake of the case itself, but for the iPhone. Third-party “thick from top to bottom” battery cases make it impossible to tell whether the enclosed phone is itself thick or thin. Apple’s Smart Battery Case makes it obvious that it’s a thin iPhone in a case which has a thick battery on the back. And I’ll say this for Apple: they are owning that hump. The hero photo of the case on the packaging is a face-on view of the back of the case.

+ +

But I think the main reasons for this design are practical. The battery doesn’t extend to the top in order to accommodate the hinge design for inserting and removing the phone. Why it doesn’t extend to the bottom is a little less obvious. I suspect one reason is that that’s where the “passively coupling antenna” is.1 Extending the battery to cover it would defeat the purpose. Also, there’s a hand feel aspect to it — normally I rest the bottom of my iPhone on my pinky finger. With this case, I can rest the bottom ridge of the hump on my pinky, and it’s kind of nice. I also like putting my index finger atop the hump.

+ +

So the Smart Battery Case looks weird. Typical battery cases look fat. Whether you prefer the weird look of the Smart Battery Case to the fat look of a typical case is subjective. Me, I don’t like the way any of them look. But after using the Smart Battery Case for three days, and having previously spent time using the thinnest available cases from Mophie, I feel confident saying Apple’s Smart Battery Case feels better when you’re holding it than any other battery case, both because of the material and its shape. It’s not even a close call. It also feels sturdier — this is the most protective iPhone case Apple has ever made, with rigid reinforced sides and a slightly higher lip rising above the touchscreen. The Smart Battery Case also clearly looks better from your own face-on perspective when using the phone. (Mophie’s cases look better than most, but they emboss an obnoxious “mophie” logotype on the front-facing chin. If Apple doesn’t print anything on the front face of the iPhone, why in the world would a case maker?)

+ +

Patents, by the way, are a non-issue regarding the Smart Battery Case’s design. A well-placed little birdie who is perched in a position to know told me that Nilay Patel’s speculation that the unusual design was the byproduct of Apple trying to steer clear of patents held by Mophie (or any other company for that matter) are “absolute nonsense”. This birdie was unequivocal on the matter. Whether you like it, hate it, or are ambivalent about it, this is the battery case Apple wanted to make.

+ +

My take is that the Smart Battery Case is an inelegant design, but it is solving a problem for which, to date, no one has created an elegant solution. Apple has simply chosen to make different severe trade-offs than the existing competition. In that sense, it is a very Apple-like product — like the hockey-puck mouse or the iMac G4.

+ +

On Capacity, Simplicity, and the Intended Use Case

+ +

Most battery cases have an on/off toggle switch, controlling when the case is actually charging the phone. The reason for this is that you can squeeze more from a battery case if you only charge the phone when it’s mostly depleted. Here’s a passage from Mophie’s FAQ page:

+ +
+

When should I turn on my mophie case?

+ +

To get the most charge out of your case, turn it on around 10%-20% +and keep the case charging without using it until your iPhone hits +80% battery life. From there, you can either wait until it gets +low again or top it off when the battery is less than 80%. Apple’s +batteries fast-charge to 80%, then switch to trickle charging for +the last 20%.

+
+ +

Simplicity is a higher priority for Apple than fiddly control. If a peripheral can get by without an on/off switch, Apple is going to omit the switch. (Exhibit B: Apple Pencil.) The whole point of the Smart Battery Case is that you charge it up and put your iPhone in it and that’s it. Complaining about the lack of an on/off toggle or external charge capacity indicator lights on the Smart Battery Case reminds me of the complaints about the original iPhone omitting the then-ubiquitous green/red hardware buttons for starting and ending phone calls. Sure, there was a purpose to them, but in the end the simplification was worth it. If your iPhone is in the case, it’s charging. That’s it.

+ +

Regarding the battery capacity of the case, here’s Lauren Goode, author of the aforelinked review for The Verge, on Twitter:

+ +
+

A quick comparison for you: $99 Apple Battery Case 1877 mAh, +$100 Mophie Juice Pack Air 2750 mAh, $50 Incipio Offgrid Express +3000 mAh

+
+ +

Nothing could better encapsulate the wrong way of looking at the Smart Battery Case than this tweet. The intended use of the Smart Battery Case is to allow prolonged, heavy use of an iPhone 6/6S throughout one day. In my testing, and judging by the reviews of others, its 1,877 mAh battery is enough for that. Adding a bigger battery would have just made it even heavier and more ungainly.

+ +

And the very name of the Incipio Offgrid Express suggests that it is intended for an entirely different use case: traveling away from power for more than a day.

+ +

Which in turn brings me to Tim Cook’s comments to Mashable’s Lance Ulanoff yesterday:

+ +
+

Some also see the introduction of an Apple battery case as an +admission that battery life on the iPhone 6 and 6s isn’t all it +should be.

+ +

Cook, though, said that “if you’re charging your phone every day, +you probably don’t need this at all. But if you’re out hiking and +you go on overnight trips… it’s kind of nice to have.”

+
+ +

The Smart Battery Case would certainly help with an overnight hiking trip, but I think Cook was off-message here, because that scenario is really not what it was designed for. Big 5,000 mAh (or more) external battery chargers (or the highest capacity, extremely thick battery cases from third parties) are far better suited to that scenario than the Smart Battery Case. But Ulanoff’s preceding paragraph points to the marketing predicament inherent in a first-party Apple battery case: that it implies the built-in battery of the iPhone 6S is insufficient.

+ +

The clear lesson is that it’s far better to give a phone more battery life by making the phone itself thicker and including a correspondingly thicker (and thus bigger) internal battery than by using any sort of external battery. After a few days using this case, my thoughts turn not to the Smart Battery Case itself but instead to my personal desire that Apple had made the 6/6S form factor slightly thicker. Not a lot thicker. Just a little — just enough to boost battery life around 15-20 percent or so.2 That wouldn’t completely alleviate the need for external batteries. But it would eliminate a lot of my need — my phone dies only a few times a year, but when it does, it almost invariably happens very late at night.

+ +

I emphasized the word “personal” in the preceding paragraph because I realize my needs and desires are not representative of the majority. I think the battery life of the iPhone 6S as-is is sufficient for the vast majority of typical users. I suspect Cook went with the overnight hiking scenario specifically to avoid the implication that the built-in battery is insufficient. But the better explanation is that the built-in battery is insufficient for power users who use their iPhones far more than most people do.

+ +

My Advice

+ +

If you find yourself short on battery with your iPhone every day (or even most days), and you can’t make an adjustment to, say, put a charging dock on your desk or in your car to give your iPhone’s internal battery a periodic snack, then you should probably bite the bullet and switch to a 6S Plus. However bulky the Plus feels in your pocket and hands, it feels less bulky to me than the iPhone 6S with any battery pack. An iPhone 6S Plus, even with a normal case on it, weighs noticeably less than an iPhone 6S with the Smart Battery Case. If you need the extra battery capacity every day, you might as well get the Plus. (If you actually prefer the bigger Plus to the 4.7-inch devices, you’re in luck — you get the screen size you prefer, and a significantly longer-lasting battery. My advice here is for those who prefer the 4.7-inch size, other considerations aside.)

+ +

That doesn’t describe me, however. On a typical day, my iPhone 6S seldom drops below 20 percent by the time I go to sleep. But when I’m traveling, I often need a portable battery of some sort. Cellular coverage can be spotty (which drains the battery), and when I’m away from home, I tend to do more (or even the entirety) of my daily computing on the iPhone. Conferences, in particular, can be dreadful on battery life. At WWDC my iPhone can drop to 50 percent by the time the keynote is over Monday morning.

+ +

In recent years, rather than use a battery case, I’ve switched to carrying a portable external battery. My favorite for the past year or so is the $80 Mophie Powerstation Plus 2X. It’s relatively small, packs a 3,000 mAh capacity, and has built-in USB and Lightning cables. At conferences or for work travel, it’s easily stashed in my laptop bag, so my pockets aren’t weighed down at all, and my iPhone isn’t saddled with an unnatural case. If I do need to carry it in my pocket, it’s not too bad. It’s also easier to share with friends or family than a battery case. At night, I just plug the Powerstation into an AC adapter, and my iPhone into the Powerstation, and both devices get charged — no need for a separate charger or any additional cables.

+ +

The big advantage to using a battery case instead of an external battery pack is that you can easily keep using your phone while it charges. That’s awkward, at best, while your phone is tethered by a cable to a small brick.

+ +

If I were going to go back to using a battery case, there’s no question in my mind that I’d go with Apple’s. The only downside to it compared to Mophie’s (and the others — but I think Mophie is clearly the leader of the pack) is that it looks funny from the back. But to my eyes it doesn’t look that funny, and though third-party cases don’t look weird, they don’t look (or feel) good. In every other way, Apple’s Smart Battery Case wins: it’s all Lightning, so any Lightning peripherals you have will work, and there’s no need to pack a grody micro USB cable; it supplies more than enough additional power to get you through an active day; its unibody design makes it much easier to insert and remove the phone; and it feels much better in hand.

+ +
+
+
    +
  1. +

    My understanding of how this “passively assistive antenna” works is that it takes the cellular signal and amplifies it as it passes through the case in a way that makes it easier for the iPhone’s antenna to “hear”. Sort of like the antenna equivalent of cupping your hand around your ear. I have no idea whether this is legit, or some sort of placebo marketing bullshit, but it would be interesting to see someone measure the cellular reception of (a) a naked iPhone 6S, (b) the same iPhone in a, say, Mophie battery case, and (c) the same iPhone in the Smart Battery Case. ↩︎

    +
  2. +
  3. +

    The iPhone 6 and 6S are actually 0.2mm thinner than their corresponding Plus models. That’s sort of crazy. The difference is barely perceptible, but if anything, the 6 and 6S should be a little thicker, not thinner, than the Plus models. ↩︎︎

    +
  4. +
+
+ + + + ]]>
+ ★ The Curious Case of the Curious Case
+ + + + tag:daringfireball.net,2015://1.31795 + 2015-11-14T04:57:52Z + 2015-12-09T03:58:34Z + + John Gruber + http://daringfireball.net/ + +Take away every single iPhone sold — all of them — and Apple’s remaining business for the quarter was almost as big as Microsoft’s, bigger than Google’s, and more than four times the size of Facebook’s.

+]]>
+ This piece by Bryan Clark for TheNextWeb caught my eye last weekend — “We’ve Reached — Maybe Passed — Peak Apple: Why the Narrative Needs to Change”:

+ +
+

Last month, Apple’s latest earnings call announced its “most +successful year ever.” The numbers were reported, the stories +were spun and Wall Street basically anointed Apple the god of +capitalism.

+ +

They’re all wrong.

+
+ +

Apple wasn’t wrong — fiscal 2015 was Apple’s most successful year ever, by the objective measures of both revenue and profit. I suppose you can decide to define “most successful year ever” in terms of something else, like percentage growth or stock price gains, but revenue and profit are pretty fair measures.

+ +

I missed it where “Wall Street basically anointed Apple the god of capitalism”. All I noticed was that Apple’s stock price went up about two percent the day after earnings were announced and has since fallen back to where it was before Q4 earnings were announced.

+ +
+

The actual story, the story we should be telling, involves a +different narrative. Apple is the largest company in the world, +but success is fleeting. While the numbers are impressive, they +don’t come close to painting an accurate picture about how much +trouble Apple is really in.

+ +

Apple’s rise under Steve Jobs was historic. Its fall under Tim +Cook is going to be much slower, more painful.

+
+ +

The fall usually is more painful than the rise. Who writes a sentence like that?

+ +

And if Apple’s fall under Cook is much slower than its rise under Steve Jobs, it’s going to take 20 or 30 years. Apple’s revival was long, slow, and relatively steady.

+ +
+

Apple lives and dies by the iPhone. iPad sales are flat, +iPod’s are all but irrelevant, and while Mac sales are up, +they’re nowhere close to the workhorse that can continue to +carry Apple should they experience a downturn in iPhone sales. +There is no Plan B.

+ +

One look at the numbers tells a pretty decisive tale.

+ +

Percentage of revenue derived from iPhone sales:

+ +
    +
  • 2012: 46.38%
  • +
  • 2013: 52.07%
  • +
  • 2014: 56.21%
  • +
  • 2015: 62.54%
  • +
+
+ +

This is the part of Clark’s piece that got my attention. It’s a common refrain these days — just search Google for “Apple is too dependent on the iPhone”.

+ +

Clark makes it sound like this is because the rest of Apple’s business is in decline, whereas the truth is that the iPhone continues to grow at an astonishing rate that even Apple’s other successful products can’t match. Is it worrisome that iPad sales continue to decline? Sure. Would it be better for Apple if the iPad were selling in iPhone-esque quantities? Of course. But iPad still sold 9.9 million units and generated $4.3 billion in revenue last quarter.

+ +

Arguing that Apple is in trouble because the iPhone is so popular is like arguing that the ’90s-era Chicago Bulls were in trouble because Michael Jordan was so good. It’s true Jordan couldn’t play forever — and the iPhone won’t be the most profitable product in the world forever. But in the meantime, the Bulls were well-nigh unbeatable, and Apple, for now at least, is unfathomably profitable.1 Just like how it’s better to have loved and lost than never to have loved at all, it’s better to have tremendous success for some period of time than never to have had tremendous success in the first place. Right?

+ +

What I don’t get is why Apple gets singled out for its singular success, but other companies don’t. 92 percent of Google’s revenue last year came from online advertising. And more importantly, I don’t get why Apple’s non-iPhone businesses are so quickly written off only because they’re so much smaller than the iPhone.

+ +

Apple’s total revenue for last quarter was $51.5 billion. The iPhone accounted for $32.2 billion of that, which means Apple’s non-iPhone business generated about $19.3 billion in revenue. All of Microsoft in the same three months: around $21 billion. All of Google: $18.78 billion. Facebook: $4.5 billion. Take away every single iPhone sold — all of them — and Apple’s remaining business for the quarter was almost as big as Microsoft’s, bigger than Google’s, and more than four times the size of Facebook’s. And this is for the July-September quarter, not the October-December holiday quarter in which Apple is strongest.

+ +

Nothing in the world compares to Apple’s iPhone business, including anything else Apple makes. But a multi-billion-per-quarter business here (Mac), a multi-billion-per-quarter business there (iPad), a “Services” division that generates more revenue than Facebook, and an “Other” category (Watch, Apple TV, Beats, iPod) that booked $3 billion in a non-holiday quarter — and it’s clear that Apple’s non-iPhone businesses, combined, amount to a massive enterprise.

+ +

Here’s a Larry Dignon column about whether iPad Pro will make “iPad material to Apple again”:

+ +
+

Apple’s iPad sales are on the borderline of being immaterial to +the company, but some analysts are betting that enterprise sales +of the iPad Pro can turn the product line around. […]

+ +

Nevertheless, the iPad franchise is sucking wind relative to the +iPhone. Apple’s annual report shows the iPad is 10 percent of +overall sales. Once a business falls below 10 percent a company +doesn’t have to break it out. In other words, the iPad could be +lumped into “other” with the Apple Watch and iPod if current +trends continue.

+
+ +

This is a product line that, in and of itself, generated just about exactly the same revenue last quarter as all of Google’s non-advertising business did for the entire fiscal year. But Apple is the company that is considered lopsided and worrisomely dependent upon a single product.

+ +

Name a product introduced in the last five years that has been more successful than the iPad — either in terms of revenue and profit for its maker, or in terms of aggregate hours of daily use and customer satisfaction of its users. I can’t think of one.

+ +

Now consider the Apple Watch. Fast Company called it “a flop” back in July. Here’s a guy on Quora — Jason Lancaster, editor of a website called Accurate Auto Advice — answering, in the affirmative, whether Apple has “already lost the market for self driving cars” (not joking):

+ +
+

Third, Apple may have peaked. Call me a hater, but what reason is +there to assume Apple’s reputation is going to stay where it is? +The watch was a flop, and their only consistent source of success +is the iPhone, as the market for Macs and iPads is drying up (as +it is for all computer hardware companies).

+
+ +

Forget the fact that Mac sales are growing, or that iPad sales, though in decline, remain roughly 10 million per quarter. What I enjoy about this is Lancaster’s having written off the Watch as a flop — he even uses the past tense.

+ +

Here’s what that flop looks like:

+ +
+

Apple has shipped seven million Apple Watches since its +introduction this spring, giving the technology giant a firm lead +in the nascent smartwatch market, according to researcher Canalys.

+ +

That number falls shy of some Wall Street analysts’ expectations +for Apple’s first new device category since 2010. But, for +perspective, consider this: Apple sold more smartwatches from +April through September than all other vendors combined sold over +the past five quarters, Canalys reports.

+
+ +

If we estimate the average selling price for an Apple Watch at $500 (reasonable), that’s $3.5 billion in revenue for the year to date — prior to the holiday quarter that is almost certainly going to be the strongest for watch sales annually.

+ +
+ +

Back to Bryan Clark’s TheNextWeb piece:

+ +
+

Steve Jobs is almost entirely responsible for Apple’s cult-like +following.

+ +

By streamlining the company in an attempt to make it profitable, +the same vision started to makes its way through every product +Apple created. Rather than bloated and flashy, Jobs created a +movement of decidedly minimalist devices that required not much +more than an occasional charge and a user that knew where the +power button was.

+ +

Between aesthetically pleasing design, rock-solid hardware, and +software that responded as if it were built for the machine — +not in spite of it — Apple culture became a cult of +Jobs-worshipping consumers willing to buy anything with a +lowercase “i” in front of it.

+
+ +

That never happened. The G4 Cube didn’t sell. iPod Hi-Fi didn’t sell. Those weren’t just non-hit products — they were both products that Steve Jobs himself really liked. I’ve heard that he had a stack of unopened iPod Hi-Fis in his office. Apple products have never been blindly accepted by the mass market — they’ve succeeded on their merits and by meeting actual demand. As I wrote two years ago:

+ +
+

To posit that Apple customers are somehow different, that when +they feel screwed by Apple their response is to go back for more, +is “Cult of Mac” logic — the supposition that most Apple +customers are irrational zealots or trend followers who just +mindlessly buy anything with an Apple logo on it. The truth is the +opposite: Apple’s business is making customers happy, and keeping +them happy. They make products for discriminating people who have +higher standards and less tolerance for design flaws or +problems.

+
+ +

Clark finally tells us what Apple’s biggest problems are:

+ +
+

There are larger issues on the horizon: For example, how does +Apple compete with Windows and Android?

+ +

Both have proven to be amazingly adept in recent years not only at +competing with Apple in form factor, but functionality as well.

+ +

Two companies that are innovating, not searching for identity +outside of a singular product.

+ +

Two companies that are on the way up, not down.

+
+ +

Windows and Android, got it.

+ +
+

The Apple Watch is great, but it’s never going to carry Apple like +the iPhone until it works like one. The watch is undeniably cool, +but it really fails to do anything better than your phone.

+ +

To make matters worse, you have to have an iPhone close by in +order to even use most of its features. Similar Android models are +self-contained and only require an occasional sync.

+ +

The autonomous car project sounds promising, but competing against +Google and Tesla in addition to auto industry giants like Lexus +and Mercedes is an uphill battle full of technology challenges, +government red tape and changing century-old transportation +conventions.

+
+ +

The best I can gather from this mishmash of a conclusion is that Apple Watch should have somehow debuted as a first-generation product that could stand toe-to-toe with the iPhone (which is now in its ninth generation), and that Apple’s car product should already be here. If there were no rumors of an Apple car, we’d be hearing that Apple is going to miss out on the next big industry that is ripe for disruption from the tech industry. But because there are rumors and hints pointing to an Apple car, we’re hearing that cars are too difficult, the established companies too entrenched. Ed Colligan’s line for the ages — “PC guys are not going to just figure this out. They’re not going to just walk in.” — was also about an industry full of longstanding giants, Google, technology challenges, government red tape, and century-old conventions. Minus the “government red tape”, that’s a pretty good description of the watch and home entertainment system industries, too.

+ +

I’m not here to argue the opposite of Colligan — that Apple’s success in these new fields is preordained — because that would be foolish. But it’s just as foolish to argue that Apple can’t succeed — or that anything less than iPhone-sized success in a new endeavor is a failure.

+ +
+
+
    +
  1. +

    The iPhone, however, is unlikely to take a year off in the prime of its career to play baseball↩︎

    +
  2. +
+
+ + + + ]]>
+ ★ What Goes Up
+ + + + tag:daringfireball.net,2015://1.31778 + 2015-11-11T13:08:58Z + 2015-11-13T08:05:24Z + + John Gruber + http://daringfireball.net/ + +The future of mass market portable computing involves neither a mouse pointer nor an x86 processor.

+]]>
+ First impressions last a lifetime, goes the adage. You’re going to have to forget your first impressions of the iPad to understand the iPad Pro.

+ +

When Apple introduced the original iPad in 2010, it was explicitly positioned in a new role for a device — somewhere between an iPhone and a MacBook. That seems obvious, but the problem, for the iPad, is that people loved their iPhones and MacBooks. The only way iPad would succeed, Steve Jobs said, was if it were “far better at doing some key things” than either an iPhone or MacBook.

+ +

Apple succeeded. Simply by nature of having a bigger display, the iPad was better than the iPhone for numerous tasks — watching videos or reading long-form text, to name just two. No one would dispute that bigger displays are better for certain tasks — you can prove the productivity gains.

+ +

What made the iPad better than a MacBook, in at least some ways, was more subjective than objective. Objectively, a MacBook was faster, by a large factor, could multitask, and offered a rich library of serious productivity apps. A Mac was, simply put, more powerful than an iPad — both in terms of hardware and software. The iPad had some objective advantages — battery life and the pixel density of its display are two that come to mind.1

+ +

The trade-offs were obvious. The iPad offered the same conceptual simplicity and intimacy as the iPhone, with the “lean-back” ergonomics of a tablet, at the cost of power — hardware performance and software complexity.

+ +

It was, in short, just a big iPhone. To the eyes of many in the tech industry, “just a big iPhone” was damning. They wanted the iPad to impress in terms of power. To the eyes of tens of millions of users, however, “just a big iPhone” was strong praise. An iPhone with a 10-inch display sounded just great.

+ +

The intervening five years have turned all of this upside down. The iPad Pro now impresses solely by dint of its engineering. Anyone who doesn’t see this is blinded by their established impressions of the first few iPads.

+ +

For the moment, put aside the form factor differences (tablet with optional keyboard vs. hinged clamshell), conceptual differences in iOS and OS X (direct touchscreen manipulation of full-screen apps vs. a mouse pointer and tiled windows) and software differences (simpler iOS apps vs. more complex OS X apps). All those points are worth consideration, but for now, put them aside. Right now, today, the iPad Pro is a peer to the current lineup of MacBooks in terms of computational hardware performance.

+ +

The iPad Pro is without question faster than the new one-port MacBook or the latest MacBook Airs. I’ve looked at several of my favorite benchmarks — Geekbench 3, Mozilla’s Kraken, and Google’s Octane 2 — and the iPad Pro is a race car. It’s only a hair slower than my year-old 13-inch MacBook Pro in single-core measurements. Graphics-wise, testing with GFXBench, it blows my MacBook Pro away. A one-year-old maxed-out MacBook Pro, rivaled by an iPad in performance benchmarks. Just think about that. According to Geekbench’s online results, the iPad Pro is faster in single-core testing than Microsoft’s new Surface Pro 4 with a Core-i5 processor. The Core-i7 version of the Surface Pro 4 isn’t shipping until December — that model will almost certainly test faster than the iPad Pro. But that’s a $1599 machine with an Intel x86 CPU. The iPad Pro starts at $799 and runs an ARM CPU — Apple’s A9X. There is no more trade-off. You don’t have to choose between the performance of x86 and the battery life of ARM.

+ +

We’ve now reached an inflection point. The new MacBook is slower, gets worse battery life, and even its cheapest configuration costs $200 more than the top-of-the-line iPad Pro. The iPad Pro is more powerful, cheaper, has a better display, and gets better battery life. It’s not a clear cut-and-dry win — MacBooks still have more RAM (the iPad Pro, in all configurations, has 4 GB of RAM, although Apple still isn’t publishing this information — MacBook Pros have either 8 or 16 GB), are expandable, and offer far more storage. But at a fundamental level — CPU speed, GPU speed, quality of the display, quality of the sound output, and overall responsiveness of interface — the iPad Pro is a better computer than a MacBook or MacBook Air, and a worthy rival to the far more expensive MacBook Pros.

+ +

The entire x86 computer architecture is living on borrowed time. It’s a dead platform walking. The future belongs to ARM, and Apple’s A-series SoC’s are leading the way.

+ +

The A9X didn’t come out of nowhere. Watching Apple’s A-series chips gain on x86 over the past five years, we’ve all been speculating about whether Apple might someday start using ARM chips in MacBooks. As of now, it’s only a question of whether they want to.

+ +

What Apple Means by ‘Pro’

+ +

With the Mac Pro, the “pro” really does stand for “professional”. There’s pretty much no reason for anyone to buy a Mac Pro unless their work is computationally expensive. There aren’t many people left whose work is slowed down regularly by the performance of their computer. The Mac Pro is aimed at that market. (That said, a higher-end iMac will outperform a Mac Pro in many tasks that aren’t well-suited to multicore parallel computing. The Mac Pro is due for an update.)

+ +

With the MacBook Pro, on the other hand, “pro” isn’t really short for “professional”. It’s more like “deluxe” — a signifier that it’s a higher-end product than its non-pro siblings. Faster, better, and accordingly higher-priced. A MacBook Pro with 1 TB of SSD storage is indeed a terrific portable computer for “professional” use by, say, a photographer or film editor or software developer — people who truly stretch the performance of any computer today, portable or otherwise. But a decked-out MacBook Pro is also a terrific and perfectly reasonable choice for anyone who can simply afford one. MacBook Airs don’t have retina displays (and likely will never be upgraded to offer them), and the one-port MacBook is relatively slow.

+ +

The iPad Pro is “pro” in the way MacBook Pros are. Genuine professionals with a professional need — visual artists in particular — are going to line up for them. But it’s also a perfectly reasonable choice for casual iPad users who just want a bigger display, louder (and now stereo) speakers, and faster performance.

+ +

Anyone tying themselves in knots looking for a specific target audience for the iPad Pro is going about it the wrong way. There is no single target audience. Is the iPad Pro meant for office workers in the enterprise? Professional artists creating content? Casual users playing games, watching movies, and reading? The answer is simply “Yes”.

+ +

Smart Keyboard and Converting to a Laptop Form Factor

+ +

So unlike the original iPad of 2010, which carved out new territory between that of an iPhone and MacBook, the iPad Pro is clearly an alternative to a MacBook. I’m sure someone out there will carry both a MacBook (of any sort) and an iPad Pro while traveling, but I don’t really see the sense of that. The iPad Mini makes perfect sense as a travel companion to a MacBook. The iPad Air does too — especially for someone who watches a lot of video or prefers larger type while reading. But the iPad Pro stands as an alternative to a MacBook. If you want to carry a MacBook, you want a smaller, lighter iPad as a companion, and you don’t need a keyboard for it. If you want to carry an iPad Pro, you might as well get the Smart Keyboard cover and leave the MacBook at home.

+ +

The trade-offs are varied. If you don’t type much, or don’t mind using the on-screen keyboard when you do, you’re probably already sold on the iPad-as-primary-portable-computer lifestyle. If you do type a lot and want a hardware keyboard, the appeal of the iPad Pro is going to largely hinge on your affinity for the Smart Keyboard.

+ +

I’ve been using this iPad Pro review unit (128 GB, with cellular — top of the line kit, natch) for eight days, and most of that time I’ve had the Smart Keyboard attached. For just plain typing, it’s not that bad — I’ve written this entire review using it, Federico Viticci-style. I went into it thinking that my biggest complaint would be the keys themselves — I like my keyboards clicky, with a lot of travel. But I adjusted to it pretty quickly, and I kind of like the way it feels, as a tactile surface. It almost feels like canvas.

+ +

My complaints and frustrations are more from the software, both iOS 9.1 itself and individual apps, both from Apple and third-party developers. Trying to use the iPad Pro as a laptop with the Smart Keyboard exposes the seams of an OS that was clearly designed for touchscreen use first. These seams aren’t new — I’m sure anyone who has tried using an iPad of any sort with a paired Bluetooth keyboard has run into the same things. This is simply the first time I’ve tried using an iPad with a hardware keyboard for an extended period for large amounts of work.

+ +

I almost wrote “for large amounts of writing” in the preceding paragraph, but the problems with an iPad and a hardware keyboard are more than about typing. A large part of my work is reading, and with a laptop, the keyboard is a big part of the reading experience. In fact, with the iPad Pro, the keyboard is even more important than it is on a MacBook — and today, it falls short.

+ +

Here’s what I mean. First, when the iPad Pro is open with the keyboard attached, holding your arm up to touch the screen for anything longer than a moment or two is ergonomically uncomfortable. Apple has stated for years that this is why they don’t make the displays on MacBooks or iMacs touchscreens (that, combined with the relatively tiny click targets of Mac OS X, which are designed for very precise mice and trackpads, not imprecise finger tips). Scrolling through a long document using the iPad Pro touch screen is uncomfortable when it’s in laptop position. Going through a slew of new emails, likewise. In laptop mode, I want to use the keyboard for these things — and in most cases, because of bugs and/or software limitations, I can’t. That the keyboard falls short in these cases is even worse on iPad than it would be on a MacBook, because a MacBook has a trackpad. The point is, if my fingers are on the keyboard, I don’t want to move my hands. With a trackpad, I don’t have to. With the iPad Pro, I do.

+ +

It’s an ancient (meaning dating back to the Classic era) Mac convention that in a read-only scrolling view, you can use the space bar to page down. When your eyes get to the bottom of the display, you can just hit space and the view should scroll to show the next screen full of content — with the last line or two of the previous screen now repeated at the top of the new screen to provide context as your eyes move from the bottom to the top of the display. This works almost everywhere on OS X, and anywhere it doesn’t work should be considered a bug.

+ +

On iOS 9.1, Safari tries to support this, but it is dreadfully buggy. Instead of paging down just less than one screen-height of content, it pages down about 1.5 screen-heights of content. It literally scrolls right past huge amounts of content, rendering the feature completely unusable.

+ +

Here’s a sample page I’ve created to illustrate. It’s just a simple text file with 1,000 lines, numbered in order. When I view that on my MacBook Pro, I see lines 1–45 (and half of line 46). When I hit space to page, the view scrolls and I now see lines 44–89. Hit space again and the view scrolls to show lines 88–132.

+ +

On iPad Pro, I see lines 1–49 initially. But when I hit space to page down, the view scrolls to show me lines 75–123. Lines 50–74 are completely skipped past. It’s not even just a line or two — it’s huge chunks of text. This happens in all web pages in Safari on iOS 9.1, and it is not specific to the iPad Pro and Smart Keyboard. I see the exact same behavior on any iPad with a paired Bluetooth keyboard.

+ +

Mail is another app in which, on my Macs, I depend heavily on the keyboard for scrolling and selection. On iPad, Mail does let you move from message to message using the keyboard (⌘↓ and ⌘↑), but it doesn’t support scrolling the actual message content — the space bar does nothing, and the Smart Keyboard doesn’t have a proper Page Down key.

+ +

The space bar doesn’t work as a Play/Pause toggle for audio or video, either. I think it should.

+ +

I don’t think it’s inherently problematic that iOS has no conceptual support for a mouse pointer, and thus can’t work with any sort of trackpad. But, given this constraint, good support for navigating as much of the UI as possible using the keyboard is more important on the iPad than it is on the Mac. But iOS’s support for navigating using the keyboard is worse.

+ +

Another problem: when editing a long document, if you use the arrow keys to move the insertion point above the first line on screen or below the last line on screen, the insertion point just disappears off screen. The view doesn’t scroll to keep the insertion point visible, which is clearly what should happen (and does happen on OS X). Surely iOS will work this way eventually, but right now it still shows its roots as a touchscreen OS where a hardware keyboard is a decided second-class citizen.

+ +

All is not lost, however. ⌘-Tab works for app switching just like it does on the Mac. Tap it and release and you switch to the most-recently used app. Tap it and keep holding down ⌘ and you get a visual switcher showing the 10 most-recently-used apps. (Again, this works with any hardware keyboard connected to any iPad — it’s just that this has been the first time it’s been relevant to me, personally.) The Smart Keyboard lacks a Home button, but there is a system-wide shortcut that maps ⌘-Shift-H to “Home”. Not bad, but once you’re at the iOS home screen, there’s not much you can do without touching the screen. For a few days, I sort of wished that I could use the arrow keys to navigate the home screen, with the selected app icon popping “up” like in the “focus” UI of the new Apple TV. But that idea, I suspect, is too far afield from the regular touchscreen-based UI of the iOS home screen. My keyboard idea requires a select-then-act two-stage model — the regular touch-based launcher is single-stage: just tap.

+ +

But then I realized that the problem I wanted to solve wasn’t that I wanted the home screen to be keyboard-navigable. The problem was that I wanted to use the keyboard to launch apps that weren’t listed in the ⌘-Tab switcher. To do that on iOS without a hardware keyboard, you go home, then tap the app. With a keyboard, though, you can do it, just in a different way.

+ +

Hit ⌘-Space system wide, and you’ll be taken to the home screen’s system-wide “Quick Search”. It’s like the iOS equivalent of Spotlight. Start typing the name of the app you want to launch, and there it is.

+ +

But go ahead and play a sad trombone wah-wah here, because at this point, you still have to pick your arm up and touch the screen to launch the app. You can also use Quick Search for starting a web search in Safari, or anything else. But you can’t use the keyboard arrow keys to navigate the list of results. (Another problem with Quick Search using the keyboard: you have to wait a second or so for the Quick Search text field to accept input. I’m pretty sure it’s because we’re waiting for the animation to complete — first to show the home screen, then to jump to Quick Search. So if you type ⌘-Space and immediately begin typing what you’re looking for, the first few characters you type are lost. The user should never have to wait for the computer, especially if it’s just for an animation. Any Mac user with muscle memory trained by LaunchBar, Alfred, Quicksilver, or even Spotlight is going to find this enforced delay on iOS maddening.)

+ +

This lack of keyboard support is prevalent system-wide. In Messages, if you start a new conversation and type the partial name of a contact, you can’t select from the list of matches using arrow keys or auto-complete the name you’ve partially typed using Tab. You’ve got to — you guessed it — reach up and touch the screen. You can use the arrow keys to select from a list of suggestions in the recipients fields in Mail, however, and arrow keys also work for selecting from the list of suggestions in the Safari location field.

+ +

The bottom line is that the potential of the iPad Pro as a laptop is tremendous. The keyboard is just fine for typing, and the magnetic connection between the iPad Pro and the keyboard is surprisingly sturdy. You can absolutely use it as a literal laptop without any worry that the iPad Pro is going to fall off the Smart Keyboard. I even like the 4:3 aspect ratio — it shows more lines of text when reading than my 13-inch MacBook Pro. It also occupies a smaller footprint than an open MacBook Pro, meaning it should fit better on the seatback tray of an airplane. But the lack of pervasive support for keyboard-based UI navigation in iOS is a problem for anyone with longstanding Mac keyboard shortcuts ingrained in their muscle memory.

+ +

As an actual cover, the Smart Keyboard does feel thick, and when closed, it bothers me a little that it’s thicker on the outer two thirds (where the keyboard is folded under) than the inner third. I wouldn’t recommend the Smart Keyboard for anyone who doesn’t plan to actually use the keyboard quite a bit. But if you do plan on using the keyboard frequently, the trade-off in thickness (compared to the non-keyboard Smart Cover) is well worth it.

+ +

(It occurs to me that for many people, the Smart Keyboard might best be thought of not as a thick cover, but as a thin very portable desktop docking station.)

+ +

Keyboard Bugs

+ +

I experienced some flakiness with the keyboard throughout the week. Sometimes, system-wide keyboard shortcuts would stop working: ⌘-Tab, ⌘-Space, and ⌘-Shift-H. Typing within apps still worked, and keyboard shortcuts within any given app still worked, but the system-wide shortcuts inexplicably stopped working.

+ +

Less frequently, I’ve seen the opposite problem: the system-wide keyboard shortcuts work, but keyboard shortcuts within any given app stop working. (iOS 9 has a very clever feature, by the way: press and hold the ⌘ key and you’ll see a HUD pop-up displaying all keyboard shortcuts available in the current context. This makes keyboard shortcuts more discoverable than they are on the Mac, where they’re spread across multiple menus in the menu bar.)

+ +

In either case, I’ve been able to fix these keyboard problems by detaching and re-attaching the iPad from the Smart Keyboard. I don’t know if it’s a bug in iOS 9.1 or a faulty Smart Keyboard. (Apple has shipped me a second Smart Keyboard to test, but it won’t arrive until later in the day, after this review has been published. I’ll update it after the replacement arrives.)

+ +

Apple Pencil

+ +

It’s about precision: accuracy where you touch (Apple claims sub-pixel precision on screen), accuracy regarding pressure, and low latency regarding what you see on screen. I am not an illustrator, but I do know my own signature. My signature never looks like my actual signature when I have to sign electronically on a point-of-sale terminal. Usually it doesn’t even look close. On iPad Pro with Apple Pencil, it looks exactly like my signature when I sign with paper and ink. My handwriting looks like my handwriting, period (for better or for worse).

+ +

All previous iOS devices have touchscreens designed for input from one source: fingertips. Fingertips are relatively fat and capacitive. The relatively fat size and imprecise location of a finger on screen is why tap targets are relatively larger and more spaced apart on iOS than OS X. This is also why third-party styluses for iOS devices have broad tips made of capacitive rubber — they’re more or less fake fingertips. The capacitive touchscreens on iPhones and (non-Pro) iPads aren’t designed for “fine tips”.

+ +

Apple has done a few things regarding sampling the screen for input with Apple Pencil. First, there is something new in the display itself — something in the layer between the glass surface and the LCD display, I think. Or perhaps it’s under the LCD? Apple alludes to it in the Jony Ive-narrated video on the Apple Pencil web page, but they’re not really talking about it in detail.

+ +

For capacitive (finger) touch, the iPad Pro samples at twice the rate of previous iPads — 120 times per second instead of 60. With the Pencil, though, the iPad Pro samples 240 times per second. The way the Pencil works requires cooperation with the display, and so there’s no way this Pencil could be made to work with existing iPads. The Pencil is not iPad Pro-exclusive out of product marketing spite — it’s exclusive to the Pro because the two were engineered in coordination with each other. And if Apple had designed the Pencil differently, to allow it to work with existing iPads, there’s no way it could have had this level of accuracy, because the tip would have needed to be broader and capacitive. (The Pencil’s tip is not capacitive at all — it doesn’t register as a touch at all on any other iOS device.)

+ +

My guess is we’ll start to see Pencil support in future iOS devices in addition to the iPad Pro, starting with the iPad Air 3.

+ +

Because the Pencil is round-barreled and has no clip on the cap, I was worried that it would roll around (and eventually, off) a table top. But it’s actually weighted inside, sort of like a Weeble Wobble, so unless it’s on a sloped surface, it won’t roll more than an inch or so before settling in place. In hand, I can’t tell that it’s weighted like this.

+ +

I think most people who buy an iPad Pro are going to want a Smart Keyboard. The Apple Pencil is the more technically remarkable peripheral, but I suspect it’ll prove useful to far fewer people. Sketching apps like 53’s Paper and Apple’s own built-in Notes app certainly have appeal and utility to people who aren’t artists, but I suspect a lot of Apple Pencils are going to be bought out of curiosity and then go largely unused.

+ +

For actual illustrators and artists, however, the Pencil and iPad Pro seem poised to be a career/industry-changing combination. What has been largely abstract — drawing using a device over here, looking at the results on a screen over there — can now be direct.

+ +

Miscellaneous

+ +
    +
  • Weight: The iPad Pro certainly feels heavier than recent iPads, but only in a way that’s commensurate with its increased size. It’s not too heavy.

  • +
  • Audio: The speakers are surprisingly loud. Apple told me the iPad Pro produces three times the audio volume of the iPad Air, and that certainly matches my experience. If you use your iPad as a small TV, the audio improvements might be more meaningful than the bigger display. The four-speaker stereo system is also very clever — no matter which way you rotate the iPad Pro, the top two speakers are for treble and the bottom two for bass.

  • +
  • Snap: Speaking of audio, if there’s a downside to the snug connection between the iPad Pro and the Smart Keyboard, it’s that the magnetic connection makes a rather loud snap when you connect or disconnect it. I can imagine some scenarios — in bed with a sleeping spouse, say — where this might be a problem.

  • +
  • Size classes: I think even Apple’s own apps are still figuring out how best to arrange layouts on this larger display. For example, in Mail, when the iPad Pro is in portrait, it only shows one column at a time. I think there’s clearly enough room horizontally, even in portrait, for a two-pane layout (narrow list of messages on left, wide message detail on right). The iPad Pro in portrait is as wide as the iPad Air in landscape — and the iPad Air in landscape uses two panes for Mail. Third-party developers are going to want to adjust their apps after they get a feel for what it’s like to use the iPad Pro for real.

  • +
  • Battery life: Simply outstanding. I didn’t even plug it in once between Monday and Friday, and it still had plenty of charge left. I’ve been using it for eight continuous hours as I type this sentence, and it still has more than a 50 percent charge remaining.

  • +
  • Missing apps: It’s been like this ever since the original iPad, but it still strikes me as odd that the iPad version of iOS lacks the Calculator, Weather, and Stocks apps. The Mac doesn’t have “apps” for Weather or Stocks, but it does have widgets for them in Notification Center. And it seems downright crazy for a computer not to have a built-in means for doing arithmetic. (Although you can do some arithmetic using Quick Search.)

  • +
  • Touch, Don’t Touch: For the past week I’ve really only used two computers. The iMac on my desk, and this iPad Pro. Today, though, I used my MacBook Pro while the iPad Pro was running benchmarks. And within a few minutes, I did something I have never once done before: I reached up and tried to touch something on the display. Ten minutes later I did it again. I point this out not to argue that I think MacBooks should have touch screens, but simply as an observation that even a lifelong Mac user can quickly get accustomed to the iPad Pro as a laptop.

  • +
+ +

Conclusion

+ +

From a hardware perspective, the iPad Pro strikes me as a seminal device. It runs faster than the Intel x86-based MacBooks, gets better battery life, and costs significantly less. And it has a better display with significantly more pixels than even a 15-inch MacBook Pro.

+ +

Software-wise, support for the Smart Keyboard needs to get even smarter — but I’d be shocked if it doesn’t. For me, the iPad Pro marks the turning point where iPads are no longer merely lightweight (both physically and conceptually) alternatives to MacBooks for use in simple scenarios, to where MacBooks will now start being seen as heavyweight alternatives to iPads for complex scenarios.2

+ +

Is it a MacBook replacement for me, personally? No. For you? Maybe. For many people? Yes.

+ +

It brings me no joy to observe this, but the future of mass market portable computing involves neither a mouse pointer nor an x86 processor.

+ +
+
+
    +
  1. +

    It’s kind of funny to think of a 2010 iPad with its 133 PPI display as “high resolution” — such a display looks comically fuzzy by today’s standards. But at the time it was a noticeably sharper display than what was in the MacBooks of the day — a 2009 13-inch MacBook Pro had a display with 113 PPI resolution↩︎

    +
  2. +
  3. +

    iOS 9’s split-screen multitasking really shines on the iPad Pro. I’ve found it useful on my iPad Air, but it’s downright natural on the iPad Pro. ↩︎︎

    +
  4. + +
+
+ + + + ]]>
+ ★ The iPad Pro
diff --git a/Modules/Parser/Tests/ParserTests/Resources/Subs.opml b/Modules/Parser/Tests/OPMLParserTests/Resources/Subs.opml similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/Subs.opml rename to Modules/Parser/Tests/OPMLParserTests/Resources/Subs.opml diff --git a/Modules/Parser/Tests/ParserTests/Resources/SubsNoTitleAttributes.opml b/Modules/Parser/Tests/OPMLParserTests/Resources/SubsNoTitleAttributes.opml similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/SubsNoTitleAttributes.opml rename to Modules/Parser/Tests/OPMLParserTests/Resources/SubsNoTitleAttributes.opml From 6966b8a7aa103323a43c21ac80fd19c94d16e1b4 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 22:42:21 -0700 Subject: [PATCH 22/88] Add baseline for performance measurement. --- ...8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist | 22 +++++++++++++ .../OPMLParserTests.xcbaseline/Info.plist | 33 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist new file mode 100644 index 000000000..fceb58558 --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist @@ -0,0 +1,22 @@ + + + + + classNames + + OPMLTests + + testOPMLParsingPerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.001998 + baselineIntegrationDisplayName + Local Baseline + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist new file mode 100644 index 000000000..c9ba9f70a --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/Info.plist @@ -0,0 +1,33 @@ + + + + + runDestinationsByUUID + + 8F8BFCF6-AACD-45D7-B626-1B58CDE0924D + + localComputer + + busSpeedInMHz + 0 + cpuCount + 1 + cpuKind + Apple M1 Max + cpuSpeedInMHz + 0 + logicalCPUCoresPerPackage + 10 + modelCode + Mac13,1 + physicalCPUCoresPerPackage + 10 + platformIdentifier + com.apple.platform.macosx + + targetArchitecture + arm64e + + + + From 13b467186c0e175112b4492dbccbc81808998742 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 26 Aug 2024 23:02:22 -0700 Subject: [PATCH 23/88] Fix bug in SAXEqualTags. --- Modules/Parser/Sources/OPMLParser/OPMLParser.swift | 1 + Modules/Parser/Sources/SAX/SAXParser.swift | 2 +- Modules/Parser/Sources/SAX/SAXUtilities.swift | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift index 578610468..d7f6b1520 100644 --- a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift +++ b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift @@ -103,6 +103,7 @@ extension OPMLParser: SAXParserDelegate { if let item = currentItem as? OPMLDocument { item.title = saxParser.currentStringWithTrimmedWhitespace } + saxParser.endStoringCharacters() return } diff --git a/Modules/Parser/Sources/SAX/SAXParser.swift b/Modules/Parser/Sources/SAX/SAXParser.swift index 5d8c6244e..cc0ec9943 100644 --- a/Modules/Parser/Sources/SAX/SAXParser.swift +++ b/Modules/Parser/Sources/SAX/SAXParser.swift @@ -85,7 +85,7 @@ public final class SAXParser { characters.count = 0 } - func endStoringCharacters() { + public func endStoringCharacters() { storingCharacters = false characters.count = 0 diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift index cdc2fdc27..dccda4e9c 100644 --- a/Modules/Parser/Sources/SAX/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -12,9 +12,9 @@ public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray) return tag.withUnsafeBufferPointer { bufferPointer in - let tagCount = tag.count + let tagCount = tag.count // includes 0 terminator - for i in 0..) } // localName might actually be longer — make sure it’s the same length as tag. - return localName[tagCount] == 0 + return localName[tagCount - 1] == 0 } } From 22ab6af28da79c6f93fd53685e849baf9bbec50f Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 27 Aug 2024 09:16:25 -0700 Subject: [PATCH 24/88] Update OPML measurement baseline. --- .../8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist index fceb58558..c392eb3bf 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/OPMLParserTests.xcbaseline/8F8BFCF6-AACD-45D7-B626-1B58CDE0924D.plist @@ -11,7 +11,7 @@ com.apple.XCTPerformanceMetric_WallClockTime baselineAverage - 0.001998 + 0.002870 baselineIntegrationDisplayName Local Baseline From ec2c294fab2051e5454dbddfc2e83d6c904d8be4 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 27 Aug 2024 09:16:41 -0700 Subject: [PATCH 25/88] Fix OPMLParser bug. Tests now pass. --- Modules/Parser/Sources/OPMLParser/OPMLParser.swift | 2 +- Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift index d7f6b1520..b7f44828f 100644 --- a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift +++ b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift @@ -73,7 +73,7 @@ private extension OPMLParser { return } - _ = itemStack.dropLast() + itemStack.removeLast() } } diff --git a/Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift b/Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift index a1b5dea8b..b8c967dd3 100644 --- a/Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift +++ b/Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift @@ -16,7 +16,7 @@ class OPMLTests: XCTestCase { func testOPMLParsingPerformance() { - // 0.002 sec on my 2012 iMac. + // 0.003 sec on my M1 Mac Studio 2022 self.measure { let _ = OPMLParser.document(with: self.subsData) } From 61825a6d8899b13e9b7a4a400064cac5d70a243e Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 27 Aug 2024 20:46:11 -0700 Subject: [PATCH 26/88] Create FeedParser target to replace Parser. --- ...ests.xcscheme => FeedParserTests.xcscheme} | 6 +- .../xcschemes/Parser-Package.xcscheme | 10 +++ .../xcshareddata/xcschemes/Parser.xcscheme | 79 ------------------- Modules/Parser/Package.swift | 10 +-- .../Feeds/FeedParser.swift | 0 .../Feeds/FeedParserError.swift | 0 .../Feeds/FeedType.swift | 0 .../Feeds/JSON/JSONFeedParser.swift | 0 .../Feeds/JSON/RSSInJSONParser.swift | 0 .../Feeds/ParsedAttachment.swift | 0 .../Feeds/ParsedAuthor.swift | 0 .../Feeds/ParsedFeed.swift | 2 +- .../Feeds/ParsedHub.swift | 0 .../Feeds/ParsedItem.swift | 0 .../Feeds/XML/AtomParser.swift | 0 .../Feeds/XML/RSParsedFeedTransformer.swift | 0 .../FeedParser/Feeds/XML/RSSParser.swift | 22 ++++++ .../JSON/JSONTypes.swift | 0 .../JSON/JSONUtilities.swift | 0 .../Sources/Parser/Feeds/XML/RSSParser.swift | 28 ------- .../AtomParserTests.swift | 0 .../EntityDecodingTests.swift | 0 .../FeedParserTypeTests.swift | 0 .../HTMLLinkTests.swift | 0 .../HTMLMetadataTests.swift | 0 .../Info.plist | 0 .../JSONFeedParserTests.swift | 0 .../ParserTests.swift | 0 .../RSDateParserTests.swift | 0 .../RSSInJSONParserTests.swift | 0 .../RSSParserTests.swift | 0 .../Resources/3960.json | 0 .../Resources/489.rss | 0 .../Resources/4fsodonline.atom | 0 .../Resources/DaringFireball.atom | 0 .../Resources/DaringFireball.html | 0 .../Resources/DaringFireball.json | 0 .../Resources/DaringFireball.rss | 0 .../Resources/EMarley.rss | 0 .../Resources/KatieFloyd.rss | 0 .../Resources/OneFootTsunami.atom | 0 .../Resources/ScriptingNews.json | 0 .../Resources/YouTubeTheVolvoRocks.html | 0 .../Resources/aktuality.rss | 0 .../Resources/allthis-partial.json | 0 .../Resources/allthis.atom | 0 .../Resources/allthis.json | 0 .../Resources/atp.rss | 0 .../Resources/authors.json | 0 .../Resources/bio.rdf | 0 .../Resources/cloudblog.rss | 0 .../Resources/coco.html | 0 .../Resources/curt.json | 0 .../Resources/dcrainmaker.xml | 0 .../Resources/donthitsave.xml | 0 .../Resources/expertopinionent.atom | 0 .../Resources/furbo.html | 0 .../Resources/inessential.html | 0 .../Resources/inessential.json | 0 .../Resources/kc0011.rss | 0 .../Resources/livemint.xml | 0 .../Resources/macworld.rss | 0 .../Resources/manton.rss | 0 .../Resources/monkeydom.rss | 0 .../Resources/natasha.xml | 0 .../Resources/phpxml.rss | 0 .../Resources/pxlnv.json | 0 .../Resources/rose.json | 0 .../Resources/russcox.atom | 0 .../Resources/scriptingNews.rss | 0 .../Resources/sixcolors.html | 0 .../Resources/theomnishow.rss | 0 72 files changed, 41 insertions(+), 116 deletions(-) rename Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/{ParserTests.xcscheme => FeedParserTests.xcscheme} (91%) delete mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser.xcscheme rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/FeedParser.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/FeedParserError.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/FeedType.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/JSON/JSONFeedParser.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/JSON/RSSInJSONParser.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/ParsedAttachment.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/ParsedAuthor.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/ParsedFeed.swift (97%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/ParsedHub.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/ParsedItem.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/XML/AtomParser.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/Feeds/XML/RSParsedFeedTransformer.swift (100%) create mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift rename Modules/Parser/Sources/{Parser => FeedParser}/JSON/JSONTypes.swift (100%) rename Modules/Parser/Sources/{Parser => FeedParser}/JSON/JSONUtilities.swift (100%) delete mode 100644 Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/AtomParserTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/EntityDecodingTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/FeedParserTypeTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/HTMLLinkTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/HTMLMetadataTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Info.plist (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/JSONFeedParserTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/ParserTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/RSDateParserTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/RSSInJSONParserTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/RSSParserTests.swift (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/3960.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/489.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/4fsodonline.atom (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/DaringFireball.atom (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/DaringFireball.html (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/DaringFireball.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/DaringFireball.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/EMarley.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/KatieFloyd.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/OneFootTsunami.atom (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/ScriptingNews.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/YouTubeTheVolvoRocks.html (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/aktuality.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/allthis-partial.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/allthis.atom (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/allthis.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/atp.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/authors.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/bio.rdf (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/cloudblog.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/coco.html (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/curt.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/dcrainmaker.xml (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/donthitsave.xml (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/expertopinionent.atom (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/furbo.html (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/inessential.html (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/inessential.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/kc0011.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/livemint.xml (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/macworld.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/manton.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/monkeydom.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/natasha.xml (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/phpxml.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/pxlnv.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/rose.json (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/russcox.atom (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/scriptingNews.rss (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/sixcolors.html (100%) rename Modules/Parser/Tests/{ParserTests => FeedParserTests}/Resources/theomnishow.rss (100%) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/ParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme similarity index 91% rename from Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/ParserTests.xcscheme rename to Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme index f202d73f0..7f09db509 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/ParserTests.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme @@ -18,9 +18,9 @@ skipped = "NO"> diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme index 1dcb9ee6e..e1ee0e250 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme @@ -78,6 +78,16 @@ ReferencedContainer = "container:"> + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 745b19884..1f9b0ee07 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -9,9 +9,9 @@ let package = Package( products: [ // Products define the executables and libraries a package produces, and make them visible to other packages. .library( - name: "Parser", + name: "FeedParser", type: .dynamic, - targets: ["Parser"]), + targets: ["FeedParser"]), .library( name: "SAX", type: .dynamic, @@ -36,7 +36,7 @@ let package = Package( .enableExperimentalFeature("StrictConcurrency") ]), .target( - name: "Parser", + name: "FeedParser", dependencies: [ "SAX" ], @@ -50,8 +50,8 @@ let package = Package( .enableExperimentalFeature("StrictConcurrency") ]), .testTarget( - name: "ParserTests", - dependencies: ["Parser"], + name: "FeedParserTests", + dependencies: ["FeedParser"], exclude: ["Info.plist"], resources: [.copy("Resources")]), .testTarget( diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/FeedParser.swift rename to Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedParserError.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParserError.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/FeedParserError.swift rename to Modules/Parser/Sources/FeedParser/Feeds/FeedParserError.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/FeedType.swift rename to Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/JSON/JSONFeedParser.swift rename to Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/JSON/RSSInJSONParser.swift rename to Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAttachment.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/ParsedAttachment.swift rename to Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/ParsedAuthor.swift rename to Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedFeed.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift similarity index 97% rename from Modules/Parser/Sources/Parser/Feeds/ParsedFeed.swift rename to Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift index 639d86ab4..6aef1b5c5 100644 --- a/Modules/Parser/Sources/Parser/Feeds/ParsedFeed.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedFeed: Sendable { +public class ParsedFeed: Sendable { public let type: FeedType public let title: String? diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedHub.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/ParsedHub.swift rename to Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/ParsedItem.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/ParsedItem.swift rename to Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/XML/AtomParser.swift rename to Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift similarity index 100% rename from Modules/Parser/Sources/Parser/Feeds/XML/RSParsedFeedTransformer.swift rename to Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift new file mode 100644 index 000000000..6a643d516 --- /dev/null +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -0,0 +1,22 @@ +// +// RSSParser.swift +// RSParser +// +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation +import SAX + +public final class RSSParser { + + private var parseFeed: ParsedFeed? + + public static func parsedFeed(with parserData: ParserData) -> ParsedFeed? { + + let parser = RSSParser(parserData) + parser.parse() + return parser.parsedFeed + } +} diff --git a/Modules/Parser/Sources/Parser/JSON/JSONTypes.swift b/Modules/Parser/Sources/FeedParser/JSON/JSONTypes.swift similarity index 100% rename from Modules/Parser/Sources/Parser/JSON/JSONTypes.swift rename to Modules/Parser/Sources/FeedParser/JSON/JSONTypes.swift diff --git a/Modules/Parser/Sources/Parser/JSON/JSONUtilities.swift b/Modules/Parser/Sources/FeedParser/JSON/JSONUtilities.swift similarity index 100% rename from Modules/Parser/Sources/Parser/JSON/JSONUtilities.swift rename to Modules/Parser/Sources/FeedParser/JSON/JSONUtilities.swift diff --git a/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift deleted file mode 100644 index 885790e16..000000000 --- a/Modules/Parser/Sources/Parser/Feeds/XML/RSSParser.swift +++ /dev/null @@ -1,28 +0,0 @@ -// -// RSSParser.swift -// RSParser -// -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation - -// RSSParser wraps the Objective-C RSRSSParser. -// -// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. -// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates -// the same things that JSONFeedParser and RSSInJSONParser create. -// -// In general, you should see FeedParser.swift for all your feed-parsing needs. - -public struct RSSParser { - - public static func parse(_ parserData: ParserData) -> ParsedFeed? { - - if let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) { - return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) - } - return nil - } -} diff --git a/Modules/Parser/Tests/ParserTests/AtomParserTests.swift b/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/AtomParserTests.swift rename to Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift diff --git a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift rename to Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift diff --git a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift rename to Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift diff --git a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift rename to Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift diff --git a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift rename to Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift diff --git a/Modules/Parser/Tests/ParserTests/Info.plist b/Modules/Parser/Tests/FeedParserTests/Info.plist similarity index 100% rename from Modules/Parser/Tests/ParserTests/Info.plist rename to Modules/Parser/Tests/FeedParserTests/Info.plist diff --git a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift rename to Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift diff --git a/Modules/Parser/Tests/ParserTests/ParserTests.swift b/Modules/Parser/Tests/FeedParserTests/ParserTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/ParserTests.swift rename to Modules/Parser/Tests/FeedParserTests/ParserTests.swift diff --git a/Modules/Parser/Tests/ParserTests/RSDateParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSDateParserTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/RSDateParserTests.swift rename to Modules/Parser/Tests/FeedParserTests/RSDateParserTests.swift diff --git a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift rename to Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift diff --git a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift similarity index 100% rename from Modules/Parser/Tests/ParserTests/RSSParserTests.swift rename to Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift diff --git a/Modules/Parser/Tests/ParserTests/Resources/3960.json b/Modules/Parser/Tests/FeedParserTests/Resources/3960.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/3960.json rename to Modules/Parser/Tests/FeedParserTests/Resources/3960.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/489.rss b/Modules/Parser/Tests/FeedParserTests/Resources/489.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/489.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/489.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/4fsodonline.atom b/Modules/Parser/Tests/FeedParserTests/Resources/4fsodonline.atom similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/4fsodonline.atom rename to Modules/Parser/Tests/FeedParserTests/Resources/4fsodonline.atom diff --git a/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.atom b/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.atom similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/DaringFireball.atom rename to Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.atom diff --git a/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.html b/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.html similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/DaringFireball.html rename to Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.html diff --git a/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.json b/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/DaringFireball.json rename to Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.rss b/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/DaringFireball.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/EMarley.rss b/Modules/Parser/Tests/FeedParserTests/Resources/EMarley.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/EMarley.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/EMarley.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/KatieFloyd.rss b/Modules/Parser/Tests/FeedParserTests/Resources/KatieFloyd.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/KatieFloyd.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/KatieFloyd.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/OneFootTsunami.atom b/Modules/Parser/Tests/FeedParserTests/Resources/OneFootTsunami.atom similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/OneFootTsunami.atom rename to Modules/Parser/Tests/FeedParserTests/Resources/OneFootTsunami.atom diff --git a/Modules/Parser/Tests/ParserTests/Resources/ScriptingNews.json b/Modules/Parser/Tests/FeedParserTests/Resources/ScriptingNews.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/ScriptingNews.json rename to Modules/Parser/Tests/FeedParserTests/Resources/ScriptingNews.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/YouTubeTheVolvoRocks.html b/Modules/Parser/Tests/FeedParserTests/Resources/YouTubeTheVolvoRocks.html similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/YouTubeTheVolvoRocks.html rename to Modules/Parser/Tests/FeedParserTests/Resources/YouTubeTheVolvoRocks.html diff --git a/Modules/Parser/Tests/ParserTests/Resources/aktuality.rss b/Modules/Parser/Tests/FeedParserTests/Resources/aktuality.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/aktuality.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/aktuality.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/allthis-partial.json b/Modules/Parser/Tests/FeedParserTests/Resources/allthis-partial.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/allthis-partial.json rename to Modules/Parser/Tests/FeedParserTests/Resources/allthis-partial.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/allthis.atom b/Modules/Parser/Tests/FeedParserTests/Resources/allthis.atom similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/allthis.atom rename to Modules/Parser/Tests/FeedParserTests/Resources/allthis.atom diff --git a/Modules/Parser/Tests/ParserTests/Resources/allthis.json b/Modules/Parser/Tests/FeedParserTests/Resources/allthis.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/allthis.json rename to Modules/Parser/Tests/FeedParserTests/Resources/allthis.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/atp.rss b/Modules/Parser/Tests/FeedParserTests/Resources/atp.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/atp.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/atp.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/authors.json b/Modules/Parser/Tests/FeedParserTests/Resources/authors.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/authors.json rename to Modules/Parser/Tests/FeedParserTests/Resources/authors.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/bio.rdf b/Modules/Parser/Tests/FeedParserTests/Resources/bio.rdf similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/bio.rdf rename to Modules/Parser/Tests/FeedParserTests/Resources/bio.rdf diff --git a/Modules/Parser/Tests/ParserTests/Resources/cloudblog.rss b/Modules/Parser/Tests/FeedParserTests/Resources/cloudblog.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/cloudblog.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/cloudblog.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/coco.html b/Modules/Parser/Tests/FeedParserTests/Resources/coco.html similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/coco.html rename to Modules/Parser/Tests/FeedParserTests/Resources/coco.html diff --git a/Modules/Parser/Tests/ParserTests/Resources/curt.json b/Modules/Parser/Tests/FeedParserTests/Resources/curt.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/curt.json rename to Modules/Parser/Tests/FeedParserTests/Resources/curt.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/dcrainmaker.xml b/Modules/Parser/Tests/FeedParserTests/Resources/dcrainmaker.xml similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/dcrainmaker.xml rename to Modules/Parser/Tests/FeedParserTests/Resources/dcrainmaker.xml diff --git a/Modules/Parser/Tests/ParserTests/Resources/donthitsave.xml b/Modules/Parser/Tests/FeedParserTests/Resources/donthitsave.xml similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/donthitsave.xml rename to Modules/Parser/Tests/FeedParserTests/Resources/donthitsave.xml diff --git a/Modules/Parser/Tests/ParserTests/Resources/expertopinionent.atom b/Modules/Parser/Tests/FeedParserTests/Resources/expertopinionent.atom similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/expertopinionent.atom rename to Modules/Parser/Tests/FeedParserTests/Resources/expertopinionent.atom diff --git a/Modules/Parser/Tests/ParserTests/Resources/furbo.html b/Modules/Parser/Tests/FeedParserTests/Resources/furbo.html similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/furbo.html rename to Modules/Parser/Tests/FeedParserTests/Resources/furbo.html diff --git a/Modules/Parser/Tests/ParserTests/Resources/inessential.html b/Modules/Parser/Tests/FeedParserTests/Resources/inessential.html similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/inessential.html rename to Modules/Parser/Tests/FeedParserTests/Resources/inessential.html diff --git a/Modules/Parser/Tests/ParserTests/Resources/inessential.json b/Modules/Parser/Tests/FeedParserTests/Resources/inessential.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/inessential.json rename to Modules/Parser/Tests/FeedParserTests/Resources/inessential.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/kc0011.rss b/Modules/Parser/Tests/FeedParserTests/Resources/kc0011.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/kc0011.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/kc0011.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/livemint.xml b/Modules/Parser/Tests/FeedParserTests/Resources/livemint.xml similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/livemint.xml rename to Modules/Parser/Tests/FeedParserTests/Resources/livemint.xml diff --git a/Modules/Parser/Tests/ParserTests/Resources/macworld.rss b/Modules/Parser/Tests/FeedParserTests/Resources/macworld.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/macworld.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/macworld.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/manton.rss b/Modules/Parser/Tests/FeedParserTests/Resources/manton.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/manton.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/manton.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/monkeydom.rss b/Modules/Parser/Tests/FeedParserTests/Resources/monkeydom.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/monkeydom.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/monkeydom.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/natasha.xml b/Modules/Parser/Tests/FeedParserTests/Resources/natasha.xml similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/natasha.xml rename to Modules/Parser/Tests/FeedParserTests/Resources/natasha.xml diff --git a/Modules/Parser/Tests/ParserTests/Resources/phpxml.rss b/Modules/Parser/Tests/FeedParserTests/Resources/phpxml.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/phpxml.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/phpxml.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/pxlnv.json b/Modules/Parser/Tests/FeedParserTests/Resources/pxlnv.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/pxlnv.json rename to Modules/Parser/Tests/FeedParserTests/Resources/pxlnv.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/rose.json b/Modules/Parser/Tests/FeedParserTests/Resources/rose.json similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/rose.json rename to Modules/Parser/Tests/FeedParserTests/Resources/rose.json diff --git a/Modules/Parser/Tests/ParserTests/Resources/russcox.atom b/Modules/Parser/Tests/FeedParserTests/Resources/russcox.atom similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/russcox.atom rename to Modules/Parser/Tests/FeedParserTests/Resources/russcox.atom diff --git a/Modules/Parser/Tests/ParserTests/Resources/scriptingNews.rss b/Modules/Parser/Tests/FeedParserTests/Resources/scriptingNews.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/scriptingNews.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/scriptingNews.rss diff --git a/Modules/Parser/Tests/ParserTests/Resources/sixcolors.html b/Modules/Parser/Tests/FeedParserTests/Resources/sixcolors.html similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/sixcolors.html rename to Modules/Parser/Tests/FeedParserTests/Resources/sixcolors.html diff --git a/Modules/Parser/Tests/ParserTests/Resources/theomnishow.rss b/Modules/Parser/Tests/FeedParserTests/Resources/theomnishow.rss similarity index 100% rename from Modules/Parser/Tests/ParserTests/Resources/theomnishow.rss rename to Modules/Parser/Tests/FeedParserTests/Resources/theomnishow.rss From f86a5f831413c414224179d3df64747a38cf50de Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 2 Sep 2024 12:03:24 -0700 Subject: [PATCH 27/88] Continue work on Swift port of Parser. --- .../xcschemes/Parser-Package.xcscheme | 24 + Modules/Parser/Package.swift | 21 +- .../Sources/DateParser/DateParser.swift | 516 ++++++++++++++++++ .../Sources/DateParser/DateParserx.swift | 354 ++++++++++++ .../Sources/FeedParser/Feeds/FeedParser.swift | 1 + .../Sources/FeedParser/Feeds/FeedType.swift | 1 + .../Feeds/JSON/JSONFeedParser.swift | 1 + .../Feeds/JSON/RSSInJSONParser.swift | 1 + .../Sources/FeedParser/Feeds/ParsedItem.swift | 2 +- .../FeedParser/Feeds/XML/AtomParser.swift | 1 + .../FeedParser/Feeds/XML/RSSArticle.swift | 111 ++++ .../FeedParser/Feeds/XML/RSSAuthor.swift | 35 ++ .../FeedParser/Feeds/XML/RSSEnclosure.swift | 20 + .../FeedParser/Feeds/XML/RSSFeed.swift | 22 + .../FeedParser/Feeds/XML/RSSParser.swift | 199 ++++++- .../Sources/OPMLParser/OPMLParser.swift | 19 +- Modules/Parser/Sources/SAX/SAXParser.swift | 4 +- .../DateParserTests/DateParserTests.swift | 116 ++++ .../FeedParserTests/RSDateParserTests.swift | 109 ---- 19 files changed, 1430 insertions(+), 127 deletions(-) create mode 100644 Modules/Parser/Sources/DateParser/DateParser.swift create mode 100644 Modules/Parser/Sources/DateParser/DateParserx.swift create mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSSArticle.swift create mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift create mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSSEnclosure.swift create mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeed.swift create mode 100644 Modules/Parser/Tests/DateParserTests/DateParserTests.swift delete mode 100644 Modules/Parser/Tests/FeedParserTests/RSDateParserTests.swift diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme index e1ee0e250..5ddd095ac 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme @@ -49,6 +49,20 @@ ReferencedContainer = "container:"> + + + + + + + + + + // MARK: - Public API + + /// Parse W3C and pubDate dates — used for feed parsing. + /// This is a fast alternative to system APIs + /// for parsing dates. + public static func date(data: Data) -> Date? { + + let numberOfBytes = data.count + + // Make sure it’s in reasonable range for a date string. + if numberOfBytes < 6 || numberOfBytes > 150 { + return nil + } + + return data.withUnsafeBytes { bytes in + let buffer = bytes.bindMemory(to: UInt8.self) + + if dateIsW3CDate(buffer, numberOfBytes) { + return parseW3CDate(buffer, numberOfBytes) + } + else if dateIsPubDate(buffer, numberOfBytes) { + return parsePubDate(buffer, numberOfBytes) + } + + // Fallback, in case our detection fails. + return parseW3CDate(buffer, numberOfBytes) + } + } +} + +// MARK: - Private + +private extension DateParser { + + struct DateCharacter { + + static let space = Character(" ").asciiValue + static let `return` = Character("\r").asciiValue + static let newline = Character("\n").asciiValue + static let tab = Character("\t").asciiValue + static let hyphen = Character("-").asciiValue + static let comma = Character(",").asciiValue + static let dot = Character(".").asciiValue + static let colon = Character(":").asciiValue + static let plus = Character("+").asciiValue + static let minus = Character("-").asciiValue + static let Z = Character("Z").asciiValue + static let z = Character("z").asciiValue + static let F = Character("F").asciiValue + static let f = Character("f").asciiValue + static let S = Character("S").asciiValue + static let s = Character("s").asciiValue + static let O = Character("O").asciiValue + static let o = Character("o").asciiValue + static let N = Character("N").asciiValue + static let n = Character("n").asciiValue + static let D = Character("D").asciiValue + static let d = Character("d").asciiValue + } + + enum Month: Int { + + January = 1, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December + } + + // MARK: - Standard Formats + + static func dateIsW3CDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Bool { + + // Something like 2010-11-17T08:40:07-05:00 + // But might be missing T character in the middle. + // Looks for four digits in a row followed by a -. + + for i in 0.. Bool { + + for ch in bytes { + if ch == DateCharacter.space || ch == DateCharacter.comma { + return true + } + } + + return false + } + + static func parseW3CDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Date { + + /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" + @"yyyy-MM-dd'T'HH:mm:sszzz" + @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" + etc.*/ + + var finalIndex = 0 + + let year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex) + let month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) + let day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) + let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) + let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) + let second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) + + let currentIndex = finalIndex + 1 + + let milliseconds = { + var ms = 0 + let hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.dot) + if hasMilliseconds { + ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex) + currentIndex = finalIndex + 1 + } + return ms + }() + + let timeZoneOffset = parsedtimeZoneOffset(bytes, numberOfBytes, currentIndex) + + return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset) + } + + static func parsePubDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Date { + + var finalIndex = 0 + + let day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex) ?? 1 + let month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex) + let year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex) + let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + + var currentIndex = finalIndex + 1 + + let second = { + var s = 0 + let hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.colon) + if hasSeconds { + s = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex) + } + return s + }() + + currentIndex = finalIndex + 1 + + let timeZoneOffset = { + var offset = 0 + let hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.space) + if hasTimeZone { + offset = parsedtimeZoneOffset(bytes, numberOfBytes, currentIndex) + } + return offset + }() + + return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset) + } + + // MARK: - Date Creation + + static func dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int, _ timeZoneOffset: Int) { + + var timeInfo = tm() + timeInfo.tm_sec = CInt(second) + timeInfo.tm_min = CInt(minute) + timeInfo.tm_hour = CInt(hour) + timeInfo.tm_mday = CInt(day) + timeInfo.tm_mon = CInt(month - 1) //It's 1-based coming in + timeInfo.tm_year = CInt(year - 1900) //see time.h -- it's years since 1900 + timeInfo.tm_wday = -1 + timeInfo.tm_yday = -1 + timeInfo.tm_isdst = -1 + timeInfo.tm_gmtoff = timeZoneOffset; + timeInfo.tm_zone = nil; + + var rawTime = timegm(&timeInfo) + if rawTime == time_t(UInt.max) { + + // NSCalendar is super-amazingly slow (which is partly why this parser exists), + // so this is used only when the date is far enough in the future + // (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. + // Hopefully by the time we consistently need dates that far in the future + // the performance of NSCalendar won’t be an issue. + + var dateComponents = DateComponents() + + dateComponents.timeZone = TimeZone(forSecondsFromGMT: timeZoneOffset) + dateComponents.year = year + dateComponents.month = month + dateComponents.day = day + dateComponents.hour = hour + dateComponents.minute = minute + dateComponents.second = second + (milliseconds / 1000) + + return Calendar.autoupdatingCurrent.date(from: dateComponents) + } + + if milliseconds > 0 { + rawTime += Float(milliseconds) / 1000.0 + } + + return Date(timeIntervalSince1970: rawTime) + } + + // MARK: - Time Zones and Offsets + + static let kGMT = "GMT".utf8CString + static let kUTC = "UTC".utf8CString + + static func parsedTimeZoneOffset(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int) -> Int { + + var timeZoneCharacters: [CChar] = [0, 0, 0, 0, 0, 0] // nil-terminated last character + var numberOfCharactersFound = 0 + var hasAtLeastOneAlphaCharacter = false + + for i in startingIndex..= 5 { + break + } + } + + if numberOfCharactersFound < 1 || timeZoneCharacters[0] == DateCharacter.Z || timeZoneCharacters[0] == DateCharacter.z { + return 0 + } + if strcasestr(timeZoneCharacters, kGMT) != nil || strcasestr(timeZoneCharacters, kUTC) != nil { + return 0 + } + + if hasAtLeastOneAlphaCharacter { + return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters) + } + return offsetInSecondsForOffsetCharacters(timeZoneCharacters) + } + + static func offsetInSecondsForOffsetCharacters(_ timeZoneCharacters: DateBuffer) { + + let isPlus = timeZoneCharacters[0] == DateCharacter.plus + + var finalIndex = 0 + let numberOfCharacters = strlen(timeZoneCharacters) + let hours = nextNumericValue(timeZoneCharacters, numberOfCharacters, 0, 2, &finalIndex) ?? 0 + let minutes = nextNumericValue(timeZoneCharacters, numberOfCharacters, finalIndex + 1, 2, &finalIndex) ?? 0 + + if hours == 0 && minutes == 0 { + return 0 + } + + var seconds = (hours * 60 * 60) + (minutes * 60) + if !isPlus { + seconds = 0 - seconds + } + + return seconds + } + + /// Returns offset in seconds. + static func timeZoneOffset(_ hours: Int, _ minutes: Int) -> Int { + + if hours < 0 { + return (hours * 60 * 60) - (minutes * 60) + } + return (hours * 60 * 60) + (minutes * 60) + } + + // See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list + private let timeZoneTable: [String: Int] = [ + + "GMT": timeZoneOffset(0, 0), + "PDT": timeZoneOffset(-7, 0), + "PST": timeZoneOffset(-8, 0), + "EST": timeZoneOffset(-5, 0), + "EDT": timeZoneOffset(-4, 0), + "MDT": timeZoneOffset(-6, 0), + "MST": timeZoneOffset(-7, 0), + "CST": timeZoneOffset(-6, 0), + "CDT": timeZoneOffset(-5, 0), + "ACT": timeZoneOffset(-8, 0), + "AFT": timeZoneOffset(4, 30), + "AMT": timeZoneOffset(4, 0), + "ART": timeZoneOffset(-3, 0), + "AST": timeZoneOffset(3, 0), + "AZT": timeZoneOffset(4, 0), + "BIT": timeZoneOffset(-12, 0), + "BDT": timeZoneOffset(8, 0), + "ACST": timeZoneOffset(9, 30), + "AEST": timeZoneOffset(10, 0), + "AKST": timeZoneOffset(-9, 0), + "AMST": timeZoneOffset(5, 0), + "AWST": timeZoneOffset(8, 0), + "AZOST": timeZoneOffset(-1, 0), + "BIOT": timeZoneOffset(6, 0), + "BRT": timeZoneOffset(-3, 0), + "BST": timeZoneOffset(6, 0), + "BTT": timeZoneOffset(6, 0), + "CAT": timeZoneOffset(2, 0), + "CCT": timeZoneOffset(6, 30), + "CET": timeZoneOffset(1, 0), + "CEST": timeZoneOffset(2, 0), + "CHAST": timeZoneOffset(12, 45), + "ChST": timeZoneOffset(10, 0), + "CIST": timeZoneOffset(-8, 0), + "CKT": timeZoneOffset(-10, 0), + "CLT": timeZoneOffset(-4, 0), + "CLST": timeZoneOffset(-3, 0), + "COT": timeZoneOffset(-5, 0), + "COST": timeZoneOffset(-4, 0), + "CVT": timeZoneOffset(-1, 0), + "CXT": timeZoneOffset(7, 0), + "EAST": timeZoneOffset(-6, 0), + "EAT": timeZoneOffset(3, 0), + "ECT": timeZoneOffset(-4, 0), + "EEST": timeZoneOffset(3, 0), + "EET": timeZoneOffset(2, 0), + "FJT": timeZoneOffset(12, 0), + "FKST": timeZoneOffset(-4, 0), + "GALT": timeZoneOffset(-6, 0), + "GET": timeZoneOffset(4, 0), + "GFT": timeZoneOffset(-3, 0), + "GILT": timeZoneOffset(7, 0), + "GIT": timeZoneOffset(-9, 0), + "GST": timeZoneOffset(-2, 0), + "GYT": timeZoneOffset(-4, 0), + "HAST": timeZoneOffset(-10, 0), + "HKT": timeZoneOffset(8, 0), + "HMT": timeZoneOffset(5, 0), + "IRKT": timeZoneOffset(8, 0), + "IRST": timeZoneOffset(3, 30), + "IST": timeZoneOffset(2, 0), + "JST": timeZoneOffset(9, 0), + "KRAT": timeZoneOffset(7, 0), + "KST": timeZoneOffset(9, 0), + "LHST": timeZoneOffset(10, 30), + "LINT": timeZoneOffset(14, 0), + "MAGT": timeZoneOffset(11, 0), + "MIT": timeZoneOffset(-9, 30), + "MSK": timeZoneOffset(3, 0), + "MUT": timeZoneOffset(4, 0), + "NDT": timeZoneOffset(-2, 30), + "NFT": timeZoneOffset(11, 30), + "NPT": timeZoneOffset(5, 45), + "NT": timeZoneOffset(-3, 30), + "OMST": timeZoneOffset(6, 0), + "PETT": timeZoneOffset(12, 0), + "PHOT": timeZoneOffset(13, 0), + "PKT": timeZoneOffset(5, 0), + "RET": timeZoneOffset(4, 0), + "SAMT": timeZoneOffset(4, 0), + "SAST": timeZoneOffset(2, 0), + "SBT": timeZoneOffset(11, 0), + "SCT": timeZoneOffset(4, 0), + "SLT": timeZoneOffset(5, 30), + "SST": timeZoneOffset(8, 0), + "TAHT": timeZoneOffset(-10, 0), + "THA": timeZoneOffset(7, 0), + "UYT": timeZoneOffset(-3, 0), + "UYST": timeZoneOffset(-2, 0), + "VET": timeZoneOffset(-4, 30), + "VLAT": timeZoneOffset(10, 0), + "WAT": timeZoneOffset(1, 0), + "WET": timeZoneOffset(0, 0), + "WEST": timeZoneOffset(1, 0), + "YAKT": timeZoneOffset(9, 0), + "YEKT": timeZoneOffset(5, 0) + ] + + static func offsetInSecondsForTimeZoneAbbreviation(_ abbreviation: DateBuffer) -> Int? { + + let name = String(cString: abbreviation) + return timeZoneTable[name] + } + + // MARK: - Parser + + static func nextMonthValue(_ buffer: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ finalIndex: inout Int) -> DateParser.Month? { + + // Lots of short-circuits here. Not strict. + + var numberOfAlphaCharactersFound = 0 + var monthCharacters: [CChar] = [0, 0, 0] + + for i in startingIndex.. 0 { + break + } + } + + numberOfAlphaCharactersFound +=1 + if numberOfAlphaCharactersFound == 1 { + if ch == DateCharacter.F || ch == DateCharacter.f { + return February + } + if ch == DateCharacter.S || ch == DateCharacter.s { + return September + } + if ch == DateCharacter.O || ch == DateCharacter.o { + return October + } + if ch == DateCharacter.N || ch == DateCharacter.n { + return November + } + if ch == DateCharacter.D || ch == DateCharacter.d { + return December + } + } + + monthCharacters[numberOfAlphaCharactersFound - 1] = character + if numberOfAlphaCharactersFound >=3 + break + } + + if numberOfAlphaCharactersFound < 2 { + return nil + } + + if monthCharacters[0] == DateCharater.J || monthCharacters[0] == DateCharacter.j { // Jan, Jun, Jul + if monthCharacters[1] == DateCharacter.A || monthCharacters[1] == DateCharacter.a { + return Month.January + } + if monthCharacters[1] = DateCharacter.U || monthCharacters[1] == DateCharacter.u { + if monthCharacters[2] == DateCharacter.N || monthCharacters[2] == DateCharacter.n { + return June + } + return July + } + return January + } + + if monthCharacters[0] == DateCharacter.M || monthCharacters[0] == DateCharacter.m { // March, May + if monthCharacters[2] == DateCharacter.Y || monthCharacters[2] == DateCharacter.y { + return May + } + return March + } + + if monthCharacters[0] == DateCharacter.A || monthCharacters[0] == DateCharacter.a { // April, August + if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u { + return August + } + return April + } + + return January // Should never get here (but possibly do) + } + + static func nextNumericValue(_ bytes: DateBuffer, numberOfBytes: Int, startingIndex: Int, maximumNumberOfDigits: Int, finalIndex: inout Int) -> Int? { + + // Maximum for the maximum is 4 (for time zone offsets and years) + assert(maximumNumberOfDigits > 0 && maximumNumberOfDigits <= 4) + + var numberOfDigitsFound = 0 + var digits = [0, 0, 0, 0] + + for i in startingIndex.. Int? { +// +// // Months are 1-based -- January is 1, Dec is 12. +// // Lots of short-circuits here. Not strict. GIGO +// +// var i = startingIndex +// var numberOfBytes = bytes.count +// var numberOfAlphaCharactersFound = 0 +// var monthCharacters = [Character]() +// +// while index < bytes.count { +// +// +// } +// +// +// var index = startingIndex +// var numberOfAlphaCharactersFound = 0 +// var monthCharacters: [Character] = [] +// +// while index < bytes.count { +// let character = bytes[bytes.index(bytes.startIndex, offsetBy: index)] +// +// if !character.isLetter, numberOfAlphaCharactersFound < 1 { +// index += 1 +// continue +// } +// if !character.isLetter, numberOfAlphaCharactersFound > 0 { +// break +// } +// +// numberOfAlphaCharactersFound += 1 +// if numberOfAlphaCharactersFound == 1 { +// switch character.lowercased() { +// case "f": return (.February.rawValue, index) +// case "s": return (.September.rawValue, index) +// case "o": return (.October.rawValue, index) +// case "n": return (.November.rawValue, index) +// case "d": return (.December.rawValue, index) +// default: break +// } +// } +// +// monthCharacters.append(character) +// if numberOfAlphaCharactersFound >= 3 { +// break +// } +// index += 1 +// } +// +// if numberOfAlphaCharactersFound < 2 { +// return (nil, index) +// } +// +// if monthCharacters[0].lowercased() == "j" { +// if monthCharacters[1].lowercased() == "a" { +// return (.January.rawValue, index) +// } +// if monthCharacters[1].lowercased() == "u" { +// if monthCharacters.count > 2 && monthCharacters[2].lowercased() == "n" { +// return (.June.rawValue, index) +// } +// return (.July.rawValue, index) +// } +// return (.January.rawValue, index) +// } +// +// if monthCharacters[0].lowercased() == "m" { +// if monthCharacters.count > 2 && monthCharacters[2].lowercased() == "y" { +// return (.May.rawValue, index) +// } +// return (.March.rawValue, index) +// } +// +// if monthCharacters[0].lowercased() == "a" { +// if monthCharacters[1].lowercased() == "u" { +// return (.August.rawValue, index) +// } +// return (.April.rawValue, index) +// } +// +// return (.January.rawValue, index) +//} +// +//func nextNumericValue(bytes: String, startingIndex: Int, maximumNumberOfDigits: Int) -> (Int?, Int) { +// let digits = bytes.dropFirst(startingIndex).prefix(maximumNumberOfDigits) +// guard let value = Int(digits) else { +// return (nil, startingIndex) +// } +// return (value, startingIndex + digits.count) +//} +// +//func hasAtLeastOneAlphaCharacter(_ s: String) -> Bool { +// return s.contains { $0.isLetter } +//} +// +//func offsetInSeconds(forTimeZoneAbbreviation abbreviation: String) -> Int { +// for zone in timeZoneTable { +// if zone.abbreviation.caseInsensitiveCompare(abbreviation) == .orderedSame { +// if zone.offsetHours < 0 { +// return (zone.offsetHours * 3600) - (zone.offsetMinutes * 60) +// } +// return (zone.offsetHours * 3600) + (zone.offsetMinutes * 60) +// } +// } +// return 0 +//} +// +//func offsetInSeconds(forOffsetCharacters timeZoneCharacters: String) -> Int { +// let isPlus = timeZoneCharacters.hasPrefix("+") +// let numericValue = timeZoneCharacters.filter { $0.isNumber || $0 == "-" } +// let (hours, finalIndex) = nextNumericValue(bytes: numericValue, startingIndex: 0, maximumNumberOfDigits: 2) +// let (minutes, _) = nextNumericValue(bytes: numericValue, startingIndex: finalIndex + 1, maximumNumberOfDigits: 2) +// +// let seconds = ((hours ?? 0) * 3600) + ((minutes ?? 0) * 60) +// return isPlus ? seconds : -seconds +//} +// +//func parsedTimeZoneOffset(bytes: String, startingIndex: Int) -> Int { +// var timeZoneCharacters: String = "" +// var numberOfCharactersFound = 0 +// var i = startingIndex +// +// while i < bytes.count, numberOfCharactersFound < 5 { +// let character = bytes[bytes.index(bytes.startIndex, offsetBy: i)] +// if character != ":" && character != " " { +// timeZoneCharacters.append(character) +// numberOfCharactersFound += 1 +// } +// i += 1 +// } +// +// if numberOfCharactersFound < 1 || timeZoneCharacters.lowercased() == "z" { +// return 0 +// } +// +// if timeZoneCharacters.range(of: "GMT", options: .caseInsensitive) != nil || +// timeZoneCharacters.range(of: "UTC", options: .caseInsensitive) != nil { +// return 0 +// } +// +// if hasAtLeastOneAlphaCharacter(timeZoneCharacters) { +// return offsetInSeconds(forTimeZoneAbbreviation: timeZoneCharacters) +// } +// return offsetInSeconds(forOffsetCharacters: timeZoneCharacters) +//} +// +//func dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset( +// year: Int, month: Int, day: Int, +// hour: Int, minute: Int, second: Int, +// milliseconds: Int, timeZoneOffset: Int) -> Date? { +// +// var dateComponents = DateComponents() +// dateComponents.year = year +// dateComponents.month = month +// dateComponents.day = day +// dateComponents.hour = hour +// dateComponents.minute = minute +// dateComponents.second = second +// dateComponents.timeZone = TimeZone(secondsFromGMT: timeZoneOffset) +// +// let calendar = Calendar.current +// return calendar.date(from: dateComponents) +//} +// +//func parsePubDate(bytes: String) -> Date? { +// let (day, finalIndex) = nextNumericValue(bytes: bytes, startingIndex: 0, maximumNumberOfDigits: 2) +// let (month, finalIndex2) = nextMonthValue(bytes: bytes, startingIndex: finalIndex + 1) +// let (year, finalIndex3) = nextNumericValue(bytes: bytes, startingIndex: finalIndex2 + 1, maximumNumberOfDigits: 4) +// let (hour, finalIndex4) = nextNumericValue(bytes: bytes, startingIndex: finalIndex3 + 1, maximumNumberOfDigits: 2) +// let (minute, finalIndex5) = nextNumericValue(bytes: bytes, startingIndex: finalIndex4 + 1, maximumNumberOfDigits: 2) +// +// var second = 0 +// let currentIndex = finalIndex5 + 1 +// if currentIndex < bytes.count, bytes[bytes.index(bytes.startIndex, offsetBy: currentIndex)] == ":" { +// second = nextNumericValue(bytes: bytes, startingIndex: currentIndex, maximumNumberOfDigits: 2).0 ?? 0 +// } +// +// let timeZoneOffset = parsedTimeZoneOffset(bytes: bytes, startingIndex: currentIndex + 1) +// +// return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset( +// year: year ?? 1970, +// month: month ?? RSMonth.January.rawValue, +// day: day ?? 1, +// hour: hour ?? 0, +// minute: minute ?? 0, +// second: second, +// milliseconds: 0, +// timeZoneOffset: timeZoneOffset +// ) +//} +// +//func parseW3C(bytes: String) -> Date? { +// let (year, finalIndex) = nextNumericValue(bytes: bytes, startingIndex: 0, maximumNumberOfDigits: 4) +// let (month, finalIndex2) = nextNumericValue(bytes: bytes, startingIndex: finalIndex + 1, maximumNumberOfDigits: 2) +// let (day, finalIndex3) = nextNumericValue(bytes: bytes, startingIndex: finalIndex2 + 1, maximumNumberOfDigits: 2) +// let (hour, finalIndex4) = nextNumericValue(bytes: bytes, startingIndex: finalIndex3 + 1, maximumNumberOfDigits: 2) +// let (minute, finalIndex5) = nextNumericValue(bytes: bytes, startingIndex: finalIndex4 + 1, maximumNumberOfDigits: 2) +// let (second, finalIndex6) = nextNumericValue(bytes: bytes, startingIndex: finalIndex5 + 1, maximumNumberOfDigits: 2) +// +// var milliseconds = 0 +// let currentIndex = finalIndex6 + 1 +// if currentIndex < bytes.count, bytes[bytes.index(bytes.startIndex, offsetBy: currentIndex)] == "." { +// milliseconds = nextNumericValue(bytes: bytes, startingIndex: currentIndex + 1, maximumNumberOfDigits: 3).0 ?? 0 +// } +// +// let timeZoneOffset = parsedTimeZoneOffset(bytes: bytes, startingIndex: currentIndex + 1) +// +// return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset( +// year: year ?? 1970, +// month: month ?? RSMonth.January.rawValue, +// day: day ?? 1, +// hour: hour ?? 0, +// minute: minute ?? 0, +// second: second ?? 0, +// milliseconds: milliseconds, +// timeZoneOffset: timeZoneOffset +// ) +//} +// +//func dateWithBytes(bytes: String) -> Date? { +// guard !bytes.isEmpty else { return nil } +// +// if bytes.range(of: "-") != nil { +// return parseW3C(bytes: bytes) +// } +// return parsePubDate(bytes: bytes) +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index a9417e31f..d1e78b464 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -7,6 +7,7 @@ // import Foundation +import SAX // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. // You don’t need to know the type of feed. diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index 4dcaaa02c..8cf33225a 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -7,6 +7,7 @@ // import Foundation +import SAX public enum FeedType: Sendable { case rss diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift index 0e765961d..247c18612 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -7,6 +7,7 @@ // import Foundation +import SAX // See https://jsonfeed.org/version/1.1 diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift index 74e6b0658..e27c0e629 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift @@ -7,6 +7,7 @@ // import Foundation +import SAX // See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md // Also: http://cyber.harvard.edu/rss/rss.html diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift index 40c719be1..c9fc2eeb8 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedItem: Hashable, Sendable { +public final class ParsedItem: Hashable, Sendable { public let syncServiceID: String? //Nil when not syncing public let uniqueID: String //RSS guid, for instance; may be calculated diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 151349af7..43fee7810 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -7,6 +7,7 @@ // import Foundation +import SAX // RSSParser wraps the Objective-C RSAtomParser. // diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSArticle.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSArticle.swift new file mode 100644 index 000000000..0bfe62cbb --- /dev/null +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSArticle.swift @@ -0,0 +1,111 @@ +// +// RSSArticle.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation +import FoundationExtras + +final class RSSArticle { + + var feedURL: String + + /// An RSS guid, if present, or calculated from other attributes. + /// Should be unique to the feed, but not necessarily unique + /// across different feeds. (Not suitable for a database ID.) + lazy var articleID: String = { + if let guid { + return guid + } + return calculatedArticleID() + }() + + var guid: String? + var title: String? + var body: String? + var link: String? + var permalink: String? + var authors: [RSSAuthor]? + var enclosures: [RSSEnclosure]? + var datePublished: Date? + var dateModified: Date? + var dateParsed: Date + var language: String? + + init(_ feedURL: String) { + self.feedURL = feedURL + self.dateParsed = Date() + } + + func addEnclosure(_ enclosure: RSSEnclosure) { + + if enclosures == nil { + enclosures = [RSSEnclosure]() + } + enclosures!.append(enclosure) + } + + func addAuthor(_ author: RSSAuthor) { + + if authors == nil { + authors = [RSSAuthor]() + } + authors!.append(author) + } +} + +private extension RSSArticle { + + func calculatedArticleID() -> String { + + // Concatenate a combination of properties when no guid. Then hash the result. + // In general, feeds should have guids. When they don't, re-runs are very likely, + // because there's no other 100% reliable way to determine identity. + // This is intended to create an ID unique inside a feed, but not globally unique. + // Not suitable for a database ID, in other words. + + var s = "" + + let datePublishedTimeStampString: String? = { + guard let datePublished else { + return nil + } + return String(format: "%.0f", datePublished.timeIntervalSince1970) + }() + + // Ideally we have a permalink and a pubDate. + // Either one would probably be a good guid, but together they should be rock-solid. + // (In theory. Feeds are buggy, though.) + if let permalink, !permalink.isEmpty, let datePublishedTimeStampString { + s.append(permalink) + s.append(datePublishedTimeStampString) + } + else if let link, !link.isEmpty, let datePublishedTimeStampString { + s.append(link) + s.append(datePublishedTimeStampString) + } + else if let title, !title.isEmpty, let datePublishedTimeStampString { + s.append(title) + s.append(datePublishedTimeStampString) + } + else if let datePublishedTimeStampString { + s.append(datePublishedTimeStampString) + } + else if let permalink, !permalink.isEmpty { + s.append(permalink) + } + else if let link, !link.isEmpty { + s.append(link) + } + else if let title, !title.isEmpty { + s.append(title) + } + else if let body, !body.isEmpty { + s.append(body) + } + + return s.md5String + } +} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift new file mode 100644 index 000000000..a153ecb1e --- /dev/null +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift @@ -0,0 +1,35 @@ +// +// RSSAuthor.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation + +final class RSSAuthor { + + var name: String? + var url: String? + var avatarURL: String? + var emailAddress: String? + + init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) { + self.name = name + self.url = url + self.avatarURL = avatarURL + self.emailAddress = emailAddress + } + + /// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.) + convenience init(singleString: String) { + + if singleString.contains("@") { + self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) + } else if singleString.lowercased().hasPrefix("http") { + self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) + } else { + self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) + } + } +} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSEnclosure.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSEnclosure.swift new file mode 100644 index 000000000..a427475c8 --- /dev/null +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSEnclosure.swift @@ -0,0 +1,20 @@ +// +// RSSEnclosure.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation + +final class RSSEnclosure { + + var url: String + var length: Int? + var mimeType: String? + var title: String? + + init(url: String) { + self.url = url + } +} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeed.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeed.swift new file mode 100644 index 000000000..34a334d3b --- /dev/null +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeed.swift @@ -0,0 +1,22 @@ +// +// RSSFeed.swift +// +// +// Created by Brent Simmons on 8/27/24. +// + +import Foundation + +final class RSSFeed { + + var urlString: String + var title: String? + var link: String? + var language: String? + + var articles: [RSSArticle]? + + init(urlString: String) { + self.urlString = urlString + } +} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index 6a643d516..0f422c27e 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -11,12 +11,205 @@ import SAX public final class RSSParser { - private var parseFeed: ParsedFeed? + private var parserData: ParserData + private var feedURL: String { + parserData.url + } + private var data: Data { + parserData.data + } + + private let feed: RSSFeed + private var articles = [RSSArticle]() + private var currentArticle: RSSArticle? { + articles.last + } - public static func parsedFeed(with parserData: ParserData) -> ParsedFeed? { + private var endRSSFound = false + private var isRDF = false + private var parsingArticle = false + private var parsingChannelImage = false + private var parsingAuthor = false + private var currentAttributes: XMLAttributesDictionary? + + public static func parsedFeed(with parserData: ParserData) -> RSSFeed { let parser = RSSParser(parserData) parser.parse() - return parser.parsedFeed + return parser.feed + } + + init(_ parserData: ParserData) { + self.parserData = parserData + self.feed = RSSFeed(urlString: parserData.url) } } + +private extension RSSParser { + + private struct XMLName { + static let uppercaseRDF = "RDF".utf8CString + static let item = "item".utf8CString + static let guid = "guid".utf8CString + static let enclosure = "enclosure".utf8CString + static let rdfAbout = "rdf:about".utf8CString + static let image = "image".utf8CString + static let author = "author".utf8CString + static let rss = "rss".utf8CString + static let link = "link".utf8CString + static let title = "title".utf8CString + static let language = "language".utf8CString + static let dc = "dc".utf8CString + static let content = "content".utf8CString + static let encoded = "encoded".utf8CString + } + + func addFeedElement(_ localName: XMLPointer, _ prefix: XMLPointer?) { + + guard prefix == nil else { + return + } + + if SAXEqualTags(localName, XMLName.link) { + if feed.link == nil { + feed.link = currentString + } + } + else if SAXEqualTags(localName, XMLName.title) { + feed.title = currentString + } + else if SAXEqualTags(localName, XMLName.language) { + feed.language = currentString + } + } + + func addArticle() { + let article = RSSArticle(feedURL) + articles.append(article) + } + + func addArticleElement(_ localName: XMLPointer, _ prefix: XMLPointer?) { + + if SAXEqualTags(prefix, XMLName.dc) { + addDCElement(localName) + return; + } + + if SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) { + if let currentString, !currentString.isEmpty { + currentArticle.body = currentString + } + return + } + + guard prefix == nil else { + return + } + + if SAXEqualTags(localName, XMLName.guid) { + addGuid() + } + else if SAXEqualTags(localName, XMLName.pubDate) { + currentArticle.datePublished = currentDate + } + else if SAXEqualTags(localName, XMLName.author) { + addAuthorWithString(currentString) + } + else if SAXEqualTags(localName, XMLName.link) { + currentArticle.link = urlString(currentString) + } + else if SAXEqualTags(localName, XMLName.description) { + if currentArticle.body == nil { + currentArticle.body = currentString + } + } + else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) { + if let currentString { + currentArticle.title = currentString + } + } + else if SAXEqualTags(localName, XMLName.enclosure) { + addEnclosure() + } + } +} + +extension RSSParser: SAXParserDelegate { + + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + + if endRSSFound { + return + } + + if SAXEqualTags(localName, XMLName.uppercaseRDF) { + isRDF = true + return + } + + var xmlAttributes: XMLAttributesDictionary? = nil + if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(enclosure, XMLName.enclosure) { + xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) + } + if currentAttributes != xmlAttributes { + currentAttributes = xmlAttributes + } + + if prefix == nil && SAXEqualTags(localName, XMLName.item) { + addArticle() + parsingArticle = true + + if isRDF && let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid + currentArticle.guid = rdfGuid + currentArticle.permalink = rdfGuid + } + } + else if prefix == nil && SAXEqualTags(localName, XMLName.image) { + parsingChannelImage = true + } + else if prefix == nil && SAXEqualTags(localName, XMLName.author) { + if parsingArticle { + parsingAuthor = true + } + } + + if !parsingChannelImage { + saxParser.beginStoringCharacters() + } + } + + public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + if endRSSFound { + return + } + + if isRDF && SAXEqualTags(localName, XMLName.uppercaseRDF) { + endRSSFound = true + } + else if SAXEqualTags(localName, XMLName.rss) { + endRSSFound = true + } + else if SAXEqualTags(localName, XMLName.image) { + parsingChannelImage = false + } + else if SAXEqualTags(localName, XMLName.item) { + parsingArticle = false + } + else if parsingArticle { + addArticleElement(localName, prefix) + if SAXEqualTags(localName, XMLName.author) { + parsingAuthor = false + } + } + else if !parsingChannelImage { + addFeedElement(localName, prefix) + } + } + + public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + + // Required method. + } +} + diff --git a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift index b7f44828f..ab21c4185 100644 --- a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift +++ b/Modules/Parser/Sources/OPMLParser/OPMLParser.swift @@ -22,11 +22,6 @@ public final class OPMLParser { itemStack.last } - struct XMLKey { - static let title = "title".utf8CString - static let outline = "outline".utf8CString - } - /// Returns nil if data can’t be parsed (if it’s not OPML). public static func document(with parserData: ParserData) -> OPMLDocument? { @@ -36,7 +31,6 @@ public final class OPMLParser { } init(_ parserData: ParserData) { - self.parserData = parserData } } @@ -79,14 +73,19 @@ private extension OPMLParser { extension OPMLParser: SAXParserDelegate { + private struct XMLName { + static let title = "title".utf8CString + static let outline = "outline".utf8CString + } + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { - if SAXEqualTags(localName, XMLKey.title) { + if SAXEqualTags(localName, XMLName.title) { saxParser.beginStoringCharacters() return } - if !SAXEqualTags(localName, XMLKey.outline) { + if !SAXEqualTags(localName, XMLName.outline) { return } @@ -99,7 +98,7 @@ extension OPMLParser: SAXParserDelegate { public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { - if SAXEqualTags(localName, XMLKey.title) { + if SAXEqualTags(localName, XMLName.title) { if let item = currentItem as? OPMLDocument { item.title = saxParser.currentStringWithTrimmedWhitespace } @@ -107,7 +106,7 @@ extension OPMLParser: SAXParserDelegate { return } - if SAXEqualTags(localName, XMLKey.outline) { + if SAXEqualTags(localName, XMLName.outline) { popItem() } } diff --git a/Modules/Parser/Sources/SAX/SAXParser.swift b/Modules/Parser/Sources/SAX/SAXParser.swift index cc0ec9943..a9e93bd9f 100644 --- a/Modules/Parser/Sources/SAX/SAXParser.swift +++ b/Modules/Parser/Sources/SAX/SAXParser.swift @@ -91,7 +91,9 @@ public final class SAXParser { characters.count = 0 } - public func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> [String: String]? { + public typealias XMLAttributesDictionary = [String: String] + + public func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> XMLAttributesDictionary? { guard attributeCount > 0, let attributes else { return nil diff --git a/Modules/Parser/Tests/DateParserTests/DateParserTests.swift b/Modules/Parser/Tests/DateParserTests/DateParserTests.swift new file mode 100644 index 000000000..32f6b5d06 --- /dev/null +++ b/Modules/Parser/Tests/DateParserTests/DateParserTests.swift @@ -0,0 +1,116 @@ +// +// RSDateParserTests.swift +// +// +// Created by Maurice Parker on 4/1/21. +// + +import Foundation +import XCTest +@testable import DateParser + +class DateParserTests: XCTestCase { + + func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int) -> Date { + var dateComponents = DateComponents() + dateComponents.calendar = Calendar.current + dateComponents.timeZone = TimeZone(secondsFromGMT: 0) + + dateComponents.year = year + dateComponents.month = month + dateComponents.day = day + dateComponents.hour = hour + dateComponents.minute = minute + dateComponents.second = second + + return dateComponents.date! + } + + func testDateWithString() { + var expectedDateResult = dateWithValues(2010, 5, 28, 21, 3, 38) + + var d = date("Fri, 28 May 2010 21:03:38 +0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 +00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 -00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 -0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("Fri, 28 May 2010 21:03:38 GMT") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38+00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38+0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38-0000") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38-00:00") + XCTAssertEqual(d, expectedDateResult) + + d = date("2010-05-28T21:03:38Z") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 7, 13, 17, 6, 40) + d = date("2010-07-13T17:06:40+00:00") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 4, 30, 12, 0, 0) + d = date("30 Apr 2010 5:00 PDT") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 5, 21, 21, 22, 53) + d = date("21 May 2010 21:22:53 GMT") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 6, 9, 5, 0, 0) + d = date("Wed, 09 Jun 2010 00:00 EST") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 6, 23, 3, 43, 50) + d = date("Wed, 23 Jun 2010 03:43:50 Z") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 6, 22, 3, 57, 49) + d = date("2010-06-22T03:57:49+00:00") + XCTAssertEqual(d, expectedDateResult) + + expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07) + d = date("2010-11-17T08:40:07-05:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testAtomDateWithMissingTCharacter() { + let expectedDateResult = dateWithValues(2010, 11, 17, 13, 40, 07) + let d = date("2010-11-17 08:40:07-05:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testFeedbinDate() { + let expectedDateResult = dateWithValues(2019, 9, 27, 21, 01, 48) + let d = date("2019-09-27T21:01:48.000000Z") + XCTAssertEqual(d, expectedDateResult) + } + + func testHighMillisecondDate() { + let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56) + let d = date("2021-03-29T10:46:56.516941+00:00") + XCTAssertEqual(d, expectedDateResult) + } +} + +private extension DateParserTests { + + func date(_ string: String) -> Date? { + let d = Data(string.utf8) + return Date(data: d) + } +} diff --git a/Modules/Parser/Tests/FeedParserTests/RSDateParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSDateParserTests.swift deleted file mode 100644 index e9984933c..000000000 --- a/Modules/Parser/Tests/FeedParserTests/RSDateParserTests.swift +++ /dev/null @@ -1,109 +0,0 @@ -// -// RSDateParserTests.swift -// -// -// Created by Maurice Parker on 4/1/21. -// - -import Foundation -import XCTest -import Parser -import ParserObjC - -class RSDateParserTests: XCTestCase { - - static func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int) -> Date { - var dateComponents = DateComponents() - dateComponents.calendar = Calendar.current - dateComponents.timeZone = TimeZone(secondsFromGMT: 0) - - dateComponents.year = year - dateComponents.month = month - dateComponents.day = day - dateComponents.hour = hour - dateComponents.minute = minute - dateComponents.second = second - - return dateComponents.date! - } - - func testDateWithString() { - var expectedDateResult = Self.dateWithValues(2010, 5, 28, 21, 3, 38) - - var d = RSDateWithString("Fri, 28 May 2010 21:03:38 +0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 +00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 -00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 -0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("Fri, 28 May 2010 21:03:38 GMT") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38+00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38+0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38-0000") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38-00:00") - XCTAssertEqual(d, expectedDateResult) - - d = RSDateWithString("2010-05-28T21:03:38Z") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 7, 13, 17, 6, 40) - d = RSDateWithString("2010-07-13T17:06:40+00:00") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 4, 30, 12, 0, 0) - d = RSDateWithString("30 Apr 2010 5:00 PDT") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 5, 21, 21, 22, 53) - d = RSDateWithString("21 May 2010 21:22:53 GMT") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 6, 9, 5, 0, 0) - d = RSDateWithString("Wed, 09 Jun 2010 00:00 EST") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 6, 23, 3, 43, 50) - d = RSDateWithString("Wed, 23 Jun 2010 03:43:50 Z") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 6, 22, 3, 57, 49) - d = RSDateWithString("2010-06-22T03:57:49+00:00") - XCTAssertEqual(d, expectedDateResult) - - expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07) - d = RSDateWithString("2010-11-17T08:40:07-05:00") - XCTAssertEqual(d, expectedDateResult) - } - - func testAtomDateWithMissingTCharacter() { - let expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07) - let d = RSDateWithString("2010-11-17 08:40:07-05:00") - XCTAssertEqual(d, expectedDateResult) - } - - func testFeedbinDate() { - let expectedDateResult = Self.dateWithValues(2019, 9, 27, 21, 01, 48) - let d = RSDateWithString("2019-09-27T21:01:48.000000Z") - XCTAssertEqual(d, expectedDateResult) - } - -// func testHighMillisecondDate() { -// let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56) -// let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00") -// XCTAssertEqual(d, expectedDateResult) -// } -} From 6578f9384b56509aad9e3714444d7e13ad5c7e4f Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Thu, 5 Sep 2024 14:30:39 -0700 Subject: [PATCH 28/88] Fix build errors in DateParser. --- .../Sources/DateParser/DateParser.swift | 643 ++++++++++-------- 1 file changed, 349 insertions(+), 294 deletions(-) diff --git a/Modules/Parser/Sources/DateParser/DateParser.swift b/Modules/Parser/Sources/DateParser/DateParser.swift index 8431c28e4..f9038ac26 100644 --- a/Modules/Parser/Sources/DateParser/DateParser.swift +++ b/Modules/Parser/Sources/DateParser/DateParser.swift @@ -9,8 +9,6 @@ import Foundation public final class DateParser { - typealias DateBuffer = UnsafeBufferPointer - // MARK: - Public API /// Parse W3C and pubDate dates — used for feed parsing. @@ -39,279 +37,14 @@ public final class DateParser { return parseW3CDate(buffer, numberOfBytes) } } -} -// MARK: - Private - -private extension DateParser { - - struct DateCharacter { - - static let space = Character(" ").asciiValue - static let `return` = Character("\r").asciiValue - static let newline = Character("\n").asciiValue - static let tab = Character("\t").asciiValue - static let hyphen = Character("-").asciiValue - static let comma = Character(",").asciiValue - static let dot = Character(".").asciiValue - static let colon = Character(":").asciiValue - static let plus = Character("+").asciiValue - static let minus = Character("-").asciiValue - static let Z = Character("Z").asciiValue - static let z = Character("z").asciiValue - static let F = Character("F").asciiValue - static let f = Character("f").asciiValue - static let S = Character("S").asciiValue - static let s = Character("s").asciiValue - static let O = Character("O").asciiValue - static let o = Character("o").asciiValue - static let N = Character("N").asciiValue - static let n = Character("n").asciiValue - static let D = Character("D").asciiValue - static let d = Character("d").asciiValue - } - - enum Month: Int { - - January = 1, - February, - March, - April, - May, - June, - July, - August, - September, - October, - November, - December - } - - // MARK: - Standard Formats - - static func dateIsW3CDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Bool { - - // Something like 2010-11-17T08:40:07-05:00 - // But might be missing T character in the middle. - // Looks for four digits in a row followed by a -. - - for i in 0.. Bool { - - for ch in bytes { - if ch == DateCharacter.space || ch == DateCharacter.comma { - return true - } - } - - return false - } - - static func parseW3CDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Date { - - /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" - @"yyyy-MM-dd'T'HH:mm:sszzz" - @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" - etc.*/ - - var finalIndex = 0 - - let year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex) - let month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) - let day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) - let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) - let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) - let second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) - - let currentIndex = finalIndex + 1 - - let milliseconds = { - var ms = 0 - let hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.dot) - if hasMilliseconds { - ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex) - currentIndex = finalIndex + 1 - } - return ms - }() - - let timeZoneOffset = parsedtimeZoneOffset(bytes, numberOfBytes, currentIndex) - - return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset) - } - - static func parsePubDate(_ bytes: DateBuffer, numberOfBytes: Int) -> Date { - - var finalIndex = 0 - - let day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex) ?? 1 - let month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex) - let year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex) - let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 - let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 - - var currentIndex = finalIndex + 1 - - let second = { - var s = 0 - let hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.colon) - if hasSeconds { - s = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex) - } - return s - }() - - currentIndex = finalIndex + 1 - - let timeZoneOffset = { - var offset = 0 - let hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.space) - if hasTimeZone { - offset = parsedtimeZoneOffset(bytes, numberOfBytes, currentIndex) - } - return offset - }() - - return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset) - } - - // MARK: - Date Creation - - static func dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int, _ timeZoneOffset: Int) { - - var timeInfo = tm() - timeInfo.tm_sec = CInt(second) - timeInfo.tm_min = CInt(minute) - timeInfo.tm_hour = CInt(hour) - timeInfo.tm_mday = CInt(day) - timeInfo.tm_mon = CInt(month - 1) //It's 1-based coming in - timeInfo.tm_year = CInt(year - 1900) //see time.h -- it's years since 1900 - timeInfo.tm_wday = -1 - timeInfo.tm_yday = -1 - timeInfo.tm_isdst = -1 - timeInfo.tm_gmtoff = timeZoneOffset; - timeInfo.tm_zone = nil; - - var rawTime = timegm(&timeInfo) - if rawTime == time_t(UInt.max) { - - // NSCalendar is super-amazingly slow (which is partly why this parser exists), - // so this is used only when the date is far enough in the future - // (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. - // Hopefully by the time we consistently need dates that far in the future - // the performance of NSCalendar won’t be an issue. - - var dateComponents = DateComponents() - - dateComponents.timeZone = TimeZone(forSecondsFromGMT: timeZoneOffset) - dateComponents.year = year - dateComponents.month = month - dateComponents.day = day - dateComponents.hour = hour - dateComponents.minute = minute - dateComponents.second = second + (milliseconds / 1000) - - return Calendar.autoupdatingCurrent.date(from: dateComponents) - } - - if milliseconds > 0 { - rawTime += Float(milliseconds) / 1000.0 - } - - return Date(timeIntervalSince1970: rawTime) - } - - // MARK: - Time Zones and Offsets - - static let kGMT = "GMT".utf8CString - static let kUTC = "UTC".utf8CString - - static func parsedTimeZoneOffset(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int) -> Int { - - var timeZoneCharacters: [CChar] = [0, 0, 0, 0, 0, 0] // nil-terminated last character - var numberOfCharactersFound = 0 - var hasAtLeastOneAlphaCharacter = false - - for i in startingIndex..= 5 { - break - } - } - - if numberOfCharactersFound < 1 || timeZoneCharacters[0] == DateCharacter.Z || timeZoneCharacters[0] == DateCharacter.z { - return 0 - } - if strcasestr(timeZoneCharacters, kGMT) != nil || strcasestr(timeZoneCharacters, kUTC) != nil { - return 0 - } - - if hasAtLeastOneAlphaCharacter { - return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters) - } - return offsetInSecondsForOffsetCharacters(timeZoneCharacters) - } - - static func offsetInSecondsForOffsetCharacters(_ timeZoneCharacters: DateBuffer) { - - let isPlus = timeZoneCharacters[0] == DateCharacter.plus - - var finalIndex = 0 - let numberOfCharacters = strlen(timeZoneCharacters) - let hours = nextNumericValue(timeZoneCharacters, numberOfCharacters, 0, 2, &finalIndex) ?? 0 - let minutes = nextNumericValue(timeZoneCharacters, numberOfCharacters, finalIndex + 1, 2, &finalIndex) ?? 0 - - if hours == 0 && minutes == 0 { - return 0 - } - - var seconds = (hours * 60 * 60) + (minutes * 60) - if !isPlus { - seconds = 0 - seconds - } - - return seconds - } - - /// Returns offset in seconds. - static func timeZoneOffset(_ hours: Int, _ minutes: Int) -> Int { - - if hours < 0 { - return (hours * 60 * 60) - (minutes * 60) - } - return (hours * 60 * 60) + (minutes * 60) - } + private typealias DateBuffer = UnsafeBufferPointer // See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list - private let timeZoneTable: [String: Int] = [ + private static let timeZoneTable: [String: Int] = [ "GMT": timeZoneOffset(0, 0), + "UTC": timeZoneOffset(0, 0), "PDT": timeZoneOffset(-7, 0), "PST": timeZoneOffset(-8, 0), "EST": timeZoneOffset(-5, 0), @@ -408,8 +141,292 @@ private extension DateParser { "YAKT": timeZoneOffset(9, 0), "YEKT": timeZoneOffset(5, 0) ] +} - static func offsetInSecondsForTimeZoneAbbreviation(_ abbreviation: DateBuffer) -> Int? { +// MARK: - Private + +private extension DateParser { + + struct DateCharacter { + + static let space = Character(" ").asciiValue! + static let `return` = Character("\r").asciiValue! + static let newline = Character("\n").asciiValue! + static let tab = Character("\t").asciiValue! + static let hyphen = Character("-").asciiValue! + static let comma = Character(",").asciiValue! + static let dot = Character(".").asciiValue! + static let colon = Character(":").asciiValue! + static let plus = Character("+").asciiValue! + static let minus = Character("-").asciiValue! + static let A = Character("A").asciiValue! + static let a = Character("a").asciiValue! + static let D = Character("D").asciiValue! + static let d = Character("d").asciiValue! + static let F = Character("F").asciiValue! + static let f = Character("f").asciiValue! + static let J = Character("J").asciiValue! + static let j = Character("j").asciiValue! + static let M = Character("M").asciiValue! + static let m = Character("m").asciiValue! + static let N = Character("N").asciiValue! + static let n = Character("n").asciiValue! + static let O = Character("O").asciiValue! + static let o = Character("o").asciiValue! + static let S = Character("S").asciiValue! + static let s = Character("s").asciiValue! + static let U = Character("U").asciiValue! + static let u = Character("u").asciiValue! + static let Y = Character("Y").asciiValue! + static let y = Character("y").asciiValue! + static let Z = Character("Z").asciiValue! + static let z = Character("z").asciiValue! + } + + enum Month: Int { + + case January = 1, + February, + March, + April, + May, + June, + July, + August, + September, + October, + November, + December + } + + // MARK: - Standard Formats + + private static func dateIsW3CDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Bool { + + // Something like 2010-11-17T08:40:07-05:00 + // But might be missing T character in the middle. + // Looks for four digits in a row followed by a -. + + for i in 0.. Bool { + + for ch in bytes { + if ch == DateCharacter.space || ch == DateCharacter.comma { + return true + } + } + + return false + } + + private static func parseW3CDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Date? { + + /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" + @"yyyy-MM-dd'T'HH:mm:sszzz" + @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" + etc.*/ + + var finalIndex = 0 + + guard let year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex) else { + return nil + } + guard let month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) else { + return nil + } + guard let day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) else { + return nil + } + let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + + var currentIndex = finalIndex + 1 + + let milliseconds = { + var ms = 0 + let hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.dot) + if hasMilliseconds { + ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex) ?? 00 + currentIndex = finalIndex + 1 + } + return ms + }() + + let timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex) + + return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset) + } + + private static func parsePubDate(_ bytes: DateBuffer, _ numberOfBytes: Int) -> Date? { + + var finalIndex = 0 + + let day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex) ?? 1 + let month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex) ?? .January + + guard let year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex) else { + return nil + } + + let hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + let minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex) ?? 0 + + var currentIndex = finalIndex + 1 + + let second = { + var s = 0 + let hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.colon) + if hasSeconds { + s = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex) ?? 0 + } + return s + }() + + currentIndex = finalIndex + 1 + + let timeZoneOffset = { + var offset = 0 + let hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == DateCharacter.space) + if hasTimeZone { + offset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex) + } + return offset + }() + + return dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(year, month.rawValue, day, hour, minute, second, 0, timeZoneOffset) + } + + // MARK: - Date Creation + + static func dateWithYearMonthDayHourMinuteSecondAndtimeZoneOffset(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int, _ timeZoneOffset: Int) -> Date? { + + var timeInfo = tm() + timeInfo.tm_sec = CInt(second) + timeInfo.tm_min = CInt(minute) + timeInfo.tm_hour = CInt(hour) + timeInfo.tm_mday = CInt(day) + timeInfo.tm_mon = CInt(month - 1) //It's 1-based coming in + timeInfo.tm_year = CInt(year - 1900) //see time.h -- it's years since 1900 + timeInfo.tm_wday = -1 + timeInfo.tm_yday = -1 + timeInfo.tm_isdst = -1 + timeInfo.tm_gmtoff = timeZoneOffset; + timeInfo.tm_zone = nil; + + var rawTime = timegm(&timeInfo) + if rawTime == time_t(UInt.max) { + + // NSCalendar is super-amazingly slow (which is partly why this parser exists), + // so this is used only when the date is far enough in the future + // (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. + // Hopefully by the time we consistently need dates that far in the future + // the performance of NSCalendar won’t be an issue. + + var dateComponents = DateComponents() + + dateComponents.timeZone = TimeZone(secondsFromGMT: timeZoneOffset) + dateComponents.year = year + dateComponents.month = month + dateComponents.day = day + dateComponents.hour = hour + dateComponents.minute = minute + dateComponents.second = second + (milliseconds / 1000) + + return Calendar.autoupdatingCurrent.date(from: dateComponents) + } + + if milliseconds > 0 { + rawTime += Int(Float(milliseconds) / 1000.0) + } + + return Date(timeIntervalSince1970: TimeInterval(rawTime)) + } + + // MARK: - Time Zones and Offsets + + private static func parsedTimeZoneOffset(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int) -> Int { + + var timeZoneCharacters: [UInt8] = [0, 0, 0, 0, 0, 0] // nil-terminated last character + var numberOfCharactersFound = 0 + var hasAtLeastOneAlphaCharacter = false + + for i in startingIndex..= 5 { + break + } + } + + if numberOfCharactersFound < 1 || timeZoneCharacters[0] == DateCharacter.Z || timeZoneCharacters[0] == DateCharacter.z { + return 0 + } + + if hasAtLeastOneAlphaCharacter { + return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters) ?? 0 + } + return offsetInSecondsForOffsetCharacters(timeZoneCharacters) + } + + private static func offsetInSecondsForOffsetCharacters(_ timeZoneCharacters: [UInt8]) -> Int { + + let isPlus = timeZoneCharacters[0] == DateCharacter.plus + var finalIndex = 0 + let numberOfCharacters = strlen(timeZoneCharacters) + + return timeZoneCharacters.withUnsafeBufferPointer { bytes in + let hours = nextNumericValue(bytes, numberOfCharacters, 0, 2, &finalIndex) ?? 0 + let minutes = nextNumericValue(bytes, numberOfCharacters, finalIndex + 1, 2, &finalIndex) ?? 0 + + if hours == 0 && minutes == 0 { + return 0 + } + + var seconds = (hours * 60 * 60) + (minutes * 60) + if !isPlus { + seconds = 0 - seconds + } + + return seconds + } + } + + /// Returns offset in seconds. + static func timeZoneOffset(_ hours: Int, _ minutes: Int) -> Int { + + if hours < 0 { + return (hours * 60 * 60) - (minutes * 60) + } + return (hours * 60 * 60) + (minutes * 60) + } + + private static func offsetInSecondsForTimeZoneAbbreviation(_ abbreviation: [UInt8]) -> Int? { let name = String(cString: abbreviation) return timeZoneTable[name] @@ -417,7 +434,7 @@ private extension DateParser { // MARK: - Parser - static func nextMonthValue(_ buffer: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ finalIndex: inout Int) -> DateParser.Month? { + private static func nextMonthValue(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ finalIndex: inout Int) -> DateParser.Month? { // Lots of short-circuits here. Not strict. @@ -429,7 +446,7 @@ private extension DateParser { finalIndex = i let ch = bytes[i] - let isAlphaCharacter = isalpha(ch) + let isAlphaCharacter = isAlpha(ch) if !isAlphaCharacter { if numberOfAlphaCharactersFound < 1 { continue @@ -439,65 +456,66 @@ private extension DateParser { } } - numberOfAlphaCharactersFound +=1 + numberOfAlphaCharactersFound+=1 if numberOfAlphaCharactersFound == 1 { if ch == DateCharacter.F || ch == DateCharacter.f { - return February + return .February } if ch == DateCharacter.S || ch == DateCharacter.s { - return September + return .September } if ch == DateCharacter.O || ch == DateCharacter.o { - return October + return .October } if ch == DateCharacter.N || ch == DateCharacter.n { - return November + return .November } if ch == DateCharacter.D || ch == DateCharacter.d { - return December + return .December } } - monthCharacters[numberOfAlphaCharactersFound - 1] = character - if numberOfAlphaCharactersFound >=3 + monthCharacters[numberOfAlphaCharactersFound - 1] = CChar(ch) + if numberOfAlphaCharactersFound >= 3 { break + } } if numberOfAlphaCharactersFound < 2 { return nil } - if monthCharacters[0] == DateCharater.J || monthCharacters[0] == DateCharacter.j { // Jan, Jun, Jul + if monthCharacters[0] == DateCharacter.J || monthCharacters[0] == DateCharacter.j { // Jan, Jun, Jul if monthCharacters[1] == DateCharacter.A || monthCharacters[1] == DateCharacter.a { - return Month.January + return .January } - if monthCharacters[1] = DateCharacter.U || monthCharacters[1] == DateCharacter.u { + if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u { if monthCharacters[2] == DateCharacter.N || monthCharacters[2] == DateCharacter.n { - return June + return .June } - return July + return .July } - return January + return .January } if monthCharacters[0] == DateCharacter.M || monthCharacters[0] == DateCharacter.m { // March, May if monthCharacters[2] == DateCharacter.Y || monthCharacters[2] == DateCharacter.y { - return May + return .May } - return March + return .March } if monthCharacters[0] == DateCharacter.A || monthCharacters[0] == DateCharacter.a { // April, August if monthCharacters[1] == DateCharacter.U || monthCharacters[1] == DateCharacter.u { - return August + return .August } - return April + return .April } - return January // Should never get here (but possibly do) + return .January // Should never get here (but possibly do) } - static func nextNumericValue(_ bytes: DateBuffer, numberOfBytes: Int, startingIndex: Int, maximumNumberOfDigits: Int, finalIndex: inout Int) -> Int? { + private static func nextNumericValue(_ bytes: DateBuffer, _ numberOfBytes: Int, _ startingIndex: Int, _ maximumNumberOfDigits: Int, _ finalIndex: inout Int) -> Int? { // Maximum for the maximum is 4 (for time zone offsets and years) assert(maximumNumberOfDigits > 0 && maximumNumberOfDigits <= 4) @@ -508,9 +526,46 @@ private extension DateParser { for i in startingIndex.. 0 { + break + } + + digits[numberOfDigitsFound] = ch - 48; // '0' is 48 + numberOfDigitsFound+=1 + if numberOfDigitsFound >= maximumNumberOfDigits { + break + } } + if numberOfDigitsFound < 1 { + return nil + } + + if numberOfDigitsFound == 1 { + return digits[0] + } + if numberOfDigitsFound == 2 { + return (digits[0] * 10) + digits[1] + } + if numberOfDigitsFound == 3 { + return (digits[0] * 100) + (digits[1] * 10) + digits[2] + } + return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3] + } + + static func isDigit(_ ch: T) -> Bool { + + return isdigit(Int32(ch)) != 0 + } + + static func isAlpha(_ ch: T) -> Bool { + + return isalpha(Int32(ch)) != 0 } } From 7468d71083047f7db97d6bb0be0432efd8c6e847 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Thu, 5 Sep 2024 14:46:25 -0700 Subject: [PATCH 29/88] Make DateParser tests work. (Same tests as from RSDataParser, ported to Swift.) --- .../xcschemes/DateParserTests.xcscheme | 54 +++++++++++++++++++ .../Sources/DateParser/DateParser.swift | 6 +-- .../DateParserTests/DateParserTests.swift | 12 ++--- 3 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme new file mode 100644 index 000000000..3580eebfb --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/Sources/DateParser/DateParser.swift b/Modules/Parser/Sources/DateParser/DateParser.swift index f9038ac26..577d2c8ba 100644 --- a/Modules/Parser/Sources/DateParser/DateParser.swift +++ b/Modules/Parser/Sources/DateParser/DateParser.swift @@ -326,11 +326,11 @@ private extension DateParser { timeInfo.tm_wday = -1 timeInfo.tm_yday = -1 timeInfo.tm_isdst = -1 - timeInfo.tm_gmtoff = timeZoneOffset; + timeInfo.tm_gmtoff = 0; timeInfo.tm_zone = nil; - var rawTime = timegm(&timeInfo) - if rawTime == time_t(UInt.max) { + var rawTime = timegm(&timeInfo) - timeZoneOffset + if rawTime == time_t(UInt32.max) { // NSCalendar is super-amazingly slow (which is partly why this parser exists), // so this is used only when the date is far enough in the future diff --git a/Modules/Parser/Tests/DateParserTests/DateParserTests.swift b/Modules/Parser/Tests/DateParserTests/DateParserTests.swift index 32f6b5d06..f96d9b5e1 100644 --- a/Modules/Parser/Tests/DateParserTests/DateParserTests.swift +++ b/Modules/Parser/Tests/DateParserTests/DateParserTests.swift @@ -100,17 +100,17 @@ class DateParserTests: XCTestCase { XCTAssertEqual(d, expectedDateResult) } - func testHighMillisecondDate() { - let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56) - let d = date("2021-03-29T10:46:56.516941+00:00") - XCTAssertEqual(d, expectedDateResult) - } +// func testHighMillisecondDate() { +// let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56) +// let d = date("2021-03-29T10:46:56.516941+00:00") +// XCTAssertEqual(d, expectedDateResult) +// } } private extension DateParserTests { func date(_ string: String) -> Date? { let d = Data(string.utf8) - return Date(data: d) + return DateParser.date(data: d) } } From 1373df4778dd39501d055e898040e8f9b8d1e166 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 8 Sep 2024 11:59:27 -0700 Subject: [PATCH 30/88] Fix some build errors in RSSParser. --- .../FeedParser/Feeds/XML/RSSParser.swift | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index 0f422c27e..6410d8ae8 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -8,6 +8,7 @@ import Foundation import SAX +import DateParser public final class RSSParser { @@ -30,7 +31,7 @@ public final class RSSParser { private var parsingArticle = false private var parsingChannelImage = false private var parsingAuthor = false - private var currentAttributes: XMLAttributesDictionary? + private var currentAttributes: SAXParser.XMLAttributesDictionary? public static func parsedFeed(with parserData: ParserData) -> RSSFeed { @@ -88,8 +89,8 @@ private extension RSSParser { articles.append(article) } - func addArticleElement(_ localName: XMLPointer, _ prefix: XMLPointer?) { - + func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { + if SAXEqualTags(prefix, XMLName.dc) { addDCElement(localName) return; @@ -110,7 +111,7 @@ private extension RSSParser { addGuid() } else if SAXEqualTags(localName, XMLName.pubDate) { - currentArticle.datePublished = currentDate + currentArticle.datePublished = currentDate(saxParser) } else if SAXEqualTags(localName, XMLName.author) { addAuthorWithString(currentString) @@ -132,6 +133,15 @@ private extension RSSParser { addEnclosure() } } + + func currentDate(_ saxParser: SAXParser) -> Date? { + + guard let data = saxParser.currentCharacters else { + return nil + } + return DateParser.date(data: data) + + } } extension RSSParser: SAXParserDelegate { @@ -197,7 +207,7 @@ extension RSSParser: SAXParserDelegate { parsingArticle = false } else if parsingArticle { - addArticleElement(localName, prefix) + addArticleElement(saxParser, localName, prefix) if SAXEqualTags(localName, XMLName.author) { parsingAuthor = false } From 77c1e26600b00d1ff14d4ca6b7240cf672cc7faf Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 9 Sep 2024 20:54:29 -0700 Subject: [PATCH 31/88] Add Swift port of the extra precision milliseconds fix from this PR: https://github.com/Ranchero-Software/NetNewsWire/pull/4356 --- ...78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist | 22 ++ .../DateParserTests.xcbaseline/Info.plist | 33 ++ .../Sources/DateParser/DateParser.swift | 14 +- .../Sources/DateParser/DateParserx.swift | 354 ------------------ .../DateParserTests/DateParserTests.swift | 37 +- 5 files changed, 96 insertions(+), 364 deletions(-) create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist delete mode 100644 Modules/Parser/Sources/DateParser/DateParserx.swift diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist new file mode 100644 index 000000000..d7dbb6cb7 --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist @@ -0,0 +1,22 @@ + + + + + classNames + + DateParserTests + + testW3CParsingPerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000121 + baselineIntegrationDisplayName + Local Baseline + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist new file mode 100644 index 000000000..132668fff --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/Info.plist @@ -0,0 +1,33 @@ + + + + + runDestinationsByUUID + + 78BB49A7-AEB4-40A1-83DA-EB9C5755E396 + + localComputer + + busSpeedInMHz + 0 + cpuCount + 1 + cpuKind + Apple M1 Max + cpuSpeedInMHz + 0 + logicalCPUCoresPerPackage + 10 + modelCode + Mac13,1 + physicalCPUCoresPerPackage + 10 + platformIdentifier + com.apple.platform.macosx + + targetArchitecture + arm64e + + + + diff --git a/Modules/Parser/Sources/DateParser/DateParser.swift b/Modules/Parser/Sources/DateParser/DateParser.swift index 577d2c8ba..60c8d9465 100644 --- a/Modules/Parser/Sources/DateParser/DateParser.swift +++ b/Modules/Parser/Sources/DateParser/DateParser.swift @@ -265,6 +265,12 @@ private extension DateParser { ms = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex) ?? 00 currentIndex = finalIndex + 1 } + + // Ignore more than 3 digits of precision + while currentIndex < numberOfBytes && isDigit(bytes[currentIndex]) { + currentIndex += 1 + } + return ms }() @@ -346,16 +352,18 @@ private extension DateParser { dateComponents.day = day dateComponents.hour = hour dateComponents.minute = minute - dateComponents.second = second + (milliseconds / 1000) + dateComponents.second = second + dateComponents.nanosecond = milliseconds * 1000000 return Calendar.autoupdatingCurrent.date(from: dateComponents) } + var timeInterval = TimeInterval(rawTime) if milliseconds > 0 { - rawTime += Int(Float(milliseconds) / 1000.0) + timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0) } - return Date(timeIntervalSince1970: TimeInterval(rawTime)) + return Date(timeIntervalSince1970: TimeInterval(timeInterval)) } // MARK: - Time Zones and Offsets diff --git a/Modules/Parser/Sources/DateParser/DateParserx.swift b/Modules/Parser/Sources/DateParser/DateParserx.swift deleted file mode 100644 index b2fd3afae..000000000 --- a/Modules/Parser/Sources/DateParser/DateParserx.swift +++ /dev/null @@ -1,354 +0,0 @@ -//// -//// DateParser.swift -//// -//// -//// Created by Brent Simmons on 8/28/24. -//// -// -//import Foundation -// -//private struct TimeZoneSpecifier { -// let abbreviation: String -// let offsetHours: Int -// let offsetMinutes: Int -// -// init(_ abbreviation: String, _ offsetHours: Int, _ offsetMinutes: Int) { -// self.abbreviation = abbreviation -// self.offsetHours = offsetHours -// self.offsetMinutes = offsetMinutes -// } -//} -// -//// See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list -//private let timeZoneTable: [TimeZoneAbbreviationAndOffset] = [ -// // Most common at top for performance -// TimeZoneSpecifier("GMT", 0, 0), -// TimeZoneSpecifier("PDT", -7, 0), -// TimeZoneSpecifier("PST", -8, 0), -// TimeZoneSpecifier("EST", -5, 0), -// TimeZoneSpecifier("EDT", -4, 0), -// TimeZoneSpecifier("MDT", -6, 0), -// TimeZoneSpecifier("MST", -7, 0), -// TimeZoneSpecifier("CST", -6, 0), -// TimeZoneSpecifier("CDT", -5, 0), -// TimeZoneSpecifier("ACT", -8, 0), -// TimeZoneSpecifier("AFT", 4, 30), -// TimeZoneSpecifier("AMT", 4, 0), -// TimeZoneSpecifier("ART", -3, 0), -// TimeZoneSpecifier("AST", 3, 0), -// TimeZoneSpecifier("AZT", 4, 0), -// TimeZoneSpecifier("BIT", -12, 0), -// TimeZoneSpecifier("BDT", 8, 0), -// TimeZoneSpecifier("ACST", 9, 30), -// TimeZoneSpecifier("AEST", 10, 0), -// TimeZoneSpecifier("AKST", -9, 0), -// TimeZoneSpecifier("AMST", 5, 0), -// TimeZoneSpecifier("AWST", 8, 0), -// TimeZoneSpecifier("AZOST", -1, 0), -// TimeZoneSpecifier("BIOT", 6, 0), -// TimeZoneSpecifier("BRT", -3, 0), -// TimeZoneSpecifier("BST", 6, 0), -// TimeZoneSpecifier("BTT", 6, 0), -// TimeZoneSpecifier("CAT", 2, 0), -// TimeZoneSpecifier("CCT", 6, 30), -// TimeZoneSpecifier("CET", 1, 0), -// TimeZoneSpecifier("CEST", 2, 0), -// TimeZoneSpecifier("CHAST", 12, 45), -// TimeZoneSpecifier("ChST", 10, 0), -// TimeZoneSpecifier("CIST", -8, 0), -// TimeZoneSpecifier("CKT", -10, 0), -// TimeZoneSpecifier("CLT", -4, 0), -// TimeZoneSpecifier("CLST", -3, 0), -// TimeZoneSpecifier("COT", -5, 0), -// TimeZoneSpecifier("COST", -4, 0), -// TimeZoneSpecifier("CVT", -1, 0), -// TimeZoneSpecifier("CXT", 7, 0), -// TimeZoneSpecifier("EAST", -6, 0), -// TimeZoneSpecifier("EAT", 3, 0), -// TimeZoneSpecifier("ECT", -4, 0), -// TimeZoneSpecifier("EEST", 3, 0), -// TimeZoneSpecifier("EET", 2, 0), -// TimeZoneSpecifier("FJT", 12, 0), -// TimeZoneSpecifier("FKST", -4, 0), -// TimeZoneSpecifier("GALT", -6, 0), -// TimeZoneSpecifier("GET", 4, 0), -// TimeZoneSpecifier("GFT", -3, 0), -// TimeZoneSpecifier("GILT", 7, 0), -// TimeZoneSpecifier("GIT", -9, 0), -// TimeZoneSpecifier("GST", -2, 0), -// TimeZoneSpecifier("GYT", -4, 0), -// TimeZoneSpecifier("HAST", -10, 0), -// TimeZoneSpecifier("HKT", 8, 0), -// TimeZoneSpecifier("HMT", 5, 0), -// TimeZoneSpecifier("IRKT", 8, 0), -// TimeZoneSpecifier("IRST", 3, 30), -// TimeZoneSpecifier("IST", 2, 0), -// TimeZoneSpecifier("JST", 9, 0), -// TimeZoneSpecifier("KRAT", 7, 0), -// TimeZoneSpecifier("KST", 9, 0), -// TimeZoneSpecifier("LHST", 10, 30), -// TimeZoneSpecifier("LINT", 14, 0), -// TimeZoneSpecifier("MAGT", 11, 0), -// TimeZoneSpecifier("MIT", -9, 30), -// TimeZoneSpecifier("MSK", 3, 0), -// TimeZoneSpecifier("MUT", 4, 0), -// TimeZoneSpecifier("NDT", -2, 30), -// TimeZoneSpecifier("NFT", 11, 30), -// TimeZoneSpecifier("NPT", 5, 45), -// TimeZoneSpecifier("NT", -3, 30), -// TimeZoneSpecifier("OMST", 6, 0), -// TimeZoneSpecifier("PETT", 12, 0), -// TimeZoneSpecifier("PHOT", 13, 0), -// TimeZoneSpecifier("PKT", 5, 0), -// TimeZoneSpecifier("RET", 4, 0), -// TimeZoneSpecifier("SAMT", 4, 0), -// TimeZoneSpecifier("SAST", 2, 0), -// TimeZoneSpecifier("SBT", 11, 0), -// TimeZoneSpecifier("SCT", 4, 0), -// TimeZoneSpecifier("SLT", 5, 30), -// TimeZoneSpecifier("SST", 8, 0), -// TimeZoneSpecifier("TAHT", -10, 0), -// TimeZoneSpecifier("THA", 7, 0), -// TimeZoneSpecifier("UYT", -3, 0), -// TimeZoneSpecifier("UYST", -2, 0), -// TimeZoneSpecifier("VET", -4, 30), -// TimeZoneSpecifier("VLAT", 10, 0), -// TimeZoneSpecifier("WAT", 1, 0), -// TimeZoneSpecifier("WET", 0, 0), -// TimeZoneSpecifier("WEST", 1, 0), -// TimeZoneSpecifier("YAKT", 9, 0), -// TimeZoneSpecifier("YEKT", 5, 0) -//] -// -//private enum Month: Int { -// case January = 1, February, March, April, May, June, July, August, September, October, November, December -//} -// -//private func nextMonthValue(bytes: String, startingIndex: Int, finalIndex: inout Int) -> Int? { -// -// // Months are 1-based -- January is 1, Dec is 12. -// // Lots of short-circuits here. Not strict. GIGO -// -// var i = startingIndex -// var numberOfBytes = bytes.count -// var numberOfAlphaCharactersFound = 0 -// var monthCharacters = [Character]() -// -// while index < bytes.count { -// -// -// } -// -// -// var index = startingIndex -// var numberOfAlphaCharactersFound = 0 -// var monthCharacters: [Character] = [] -// -// while index < bytes.count { -// let character = bytes[bytes.index(bytes.startIndex, offsetBy: index)] -// -// if !character.isLetter, numberOfAlphaCharactersFound < 1 { -// index += 1 -// continue -// } -// if !character.isLetter, numberOfAlphaCharactersFound > 0 { -// break -// } -// -// numberOfAlphaCharactersFound += 1 -// if numberOfAlphaCharactersFound == 1 { -// switch character.lowercased() { -// case "f": return (.February.rawValue, index) -// case "s": return (.September.rawValue, index) -// case "o": return (.October.rawValue, index) -// case "n": return (.November.rawValue, index) -// case "d": return (.December.rawValue, index) -// default: break -// } -// } -// -// monthCharacters.append(character) -// if numberOfAlphaCharactersFound >= 3 { -// break -// } -// index += 1 -// } -// -// if numberOfAlphaCharactersFound < 2 { -// return (nil, index) -// } -// -// if monthCharacters[0].lowercased() == "j" { -// if monthCharacters[1].lowercased() == "a" { -// return (.January.rawValue, index) -// } -// if monthCharacters[1].lowercased() == "u" { -// if monthCharacters.count > 2 && monthCharacters[2].lowercased() == "n" { -// return (.June.rawValue, index) -// } -// return (.July.rawValue, index) -// } -// return (.January.rawValue, index) -// } -// -// if monthCharacters[0].lowercased() == "m" { -// if monthCharacters.count > 2 && monthCharacters[2].lowercased() == "y" { -// return (.May.rawValue, index) -// } -// return (.March.rawValue, index) -// } -// -// if monthCharacters[0].lowercased() == "a" { -// if monthCharacters[1].lowercased() == "u" { -// return (.August.rawValue, index) -// } -// return (.April.rawValue, index) -// } -// -// return (.January.rawValue, index) -//} -// -//func nextNumericValue(bytes: String, startingIndex: Int, maximumNumberOfDigits: Int) -> (Int?, Int) { -// let digits = bytes.dropFirst(startingIndex).prefix(maximumNumberOfDigits) -// guard let value = Int(digits) else { -// return (nil, startingIndex) -// } -// return (value, startingIndex + digits.count) -//} -// -//func hasAtLeastOneAlphaCharacter(_ s: String) -> Bool { -// return s.contains { $0.isLetter } -//} -// -//func offsetInSeconds(forTimeZoneAbbreviation abbreviation: String) -> Int { -// for zone in timeZoneTable { -// if zone.abbreviation.caseInsensitiveCompare(abbreviation) == .orderedSame { -// if zone.offsetHours < 0 { -// return (zone.offsetHours * 3600) - (zone.offsetMinutes * 60) -// } -// return (zone.offsetHours * 3600) + (zone.offsetMinutes * 60) -// } -// } -// return 0 -//} -// -//func offsetInSeconds(forOffsetCharacters timeZoneCharacters: String) -> Int { -// let isPlus = timeZoneCharacters.hasPrefix("+") -// let numericValue = timeZoneCharacters.filter { $0.isNumber || $0 == "-" } -// let (hours, finalIndex) = nextNumericValue(bytes: numericValue, startingIndex: 0, maximumNumberOfDigits: 2) -// let (minutes, _) = nextNumericValue(bytes: numericValue, startingIndex: finalIndex + 1, maximumNumberOfDigits: 2) -// -// let seconds = ((hours ?? 0) * 3600) + ((minutes ?? 0) * 60) -// return isPlus ? seconds : -seconds -//} -// -//func parsedTimeZoneOffset(bytes: String, startingIndex: Int) -> Int { -// var timeZoneCharacters: String = "" -// var numberOfCharactersFound = 0 -// var i = startingIndex -// -// while i < bytes.count, numberOfCharactersFound < 5 { -// let character = bytes[bytes.index(bytes.startIndex, offsetBy: i)] -// if character != ":" && character != " " { -// timeZoneCharacters.append(character) -// numberOfCharactersFound += 1 -// } -// i += 1 -// } -// -// if numberOfCharactersFound < 1 || timeZoneCharacters.lowercased() == "z" { -// return 0 -// } -// -// if timeZoneCharacters.range(of: "GMT", options: .caseInsensitive) != nil || -// timeZoneCharacters.range(of: "UTC", options: .caseInsensitive) != nil { -// return 0 -// } -// -// if hasAtLeastOneAlphaCharacter(timeZoneCharacters) { -// return offsetInSeconds(forTimeZoneAbbreviation: timeZoneCharacters) -// } -// return offsetInSeconds(forOffsetCharacters: timeZoneCharacters) -//} -// -//func dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset( -// year: Int, month: Int, day: Int, -// hour: Int, minute: Int, second: Int, -// milliseconds: Int, timeZoneOffset: Int) -> Date? { -// -// var dateComponents = DateComponents() -// dateComponents.year = year -// dateComponents.month = month -// dateComponents.day = day -// dateComponents.hour = hour -// dateComponents.minute = minute -// dateComponents.second = second -// dateComponents.timeZone = TimeZone(secondsFromGMT: timeZoneOffset) -// -// let calendar = Calendar.current -// return calendar.date(from: dateComponents) -//} -// -//func parsePubDate(bytes: String) -> Date? { -// let (day, finalIndex) = nextNumericValue(bytes: bytes, startingIndex: 0, maximumNumberOfDigits: 2) -// let (month, finalIndex2) = nextMonthValue(bytes: bytes, startingIndex: finalIndex + 1) -// let (year, finalIndex3) = nextNumericValue(bytes: bytes, startingIndex: finalIndex2 + 1, maximumNumberOfDigits: 4) -// let (hour, finalIndex4) = nextNumericValue(bytes: bytes, startingIndex: finalIndex3 + 1, maximumNumberOfDigits: 2) -// let (minute, finalIndex5) = nextNumericValue(bytes: bytes, startingIndex: finalIndex4 + 1, maximumNumberOfDigits: 2) -// -// var second = 0 -// let currentIndex = finalIndex5 + 1 -// if currentIndex < bytes.count, bytes[bytes.index(bytes.startIndex, offsetBy: currentIndex)] == ":" { -// second = nextNumericValue(bytes: bytes, startingIndex: currentIndex, maximumNumberOfDigits: 2).0 ?? 0 -// } -// -// let timeZoneOffset = parsedTimeZoneOffset(bytes: bytes, startingIndex: currentIndex + 1) -// -// return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset( -// year: year ?? 1970, -// month: month ?? RSMonth.January.rawValue, -// day: day ?? 1, -// hour: hour ?? 0, -// minute: minute ?? 0, -// second: second, -// milliseconds: 0, -// timeZoneOffset: timeZoneOffset -// ) -//} -// -//func parseW3C(bytes: String) -> Date? { -// let (year, finalIndex) = nextNumericValue(bytes: bytes, startingIndex: 0, maximumNumberOfDigits: 4) -// let (month, finalIndex2) = nextNumericValue(bytes: bytes, startingIndex: finalIndex + 1, maximumNumberOfDigits: 2) -// let (day, finalIndex3) = nextNumericValue(bytes: bytes, startingIndex: finalIndex2 + 1, maximumNumberOfDigits: 2) -// let (hour, finalIndex4) = nextNumericValue(bytes: bytes, startingIndex: finalIndex3 + 1, maximumNumberOfDigits: 2) -// let (minute, finalIndex5) = nextNumericValue(bytes: bytes, startingIndex: finalIndex4 + 1, maximumNumberOfDigits: 2) -// let (second, finalIndex6) = nextNumericValue(bytes: bytes, startingIndex: finalIndex5 + 1, maximumNumberOfDigits: 2) -// -// var milliseconds = 0 -// let currentIndex = finalIndex6 + 1 -// if currentIndex < bytes.count, bytes[bytes.index(bytes.startIndex, offsetBy: currentIndex)] == "." { -// milliseconds = nextNumericValue(bytes: bytes, startingIndex: currentIndex + 1, maximumNumberOfDigits: 3).0 ?? 0 -// } -// -// let timeZoneOffset = parsedTimeZoneOffset(bytes: bytes, startingIndex: currentIndex + 1) -// -// return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset( -// year: year ?? 1970, -// month: month ?? RSMonth.January.rawValue, -// day: day ?? 1, -// hour: hour ?? 0, -// minute: minute ?? 0, -// second: second ?? 0, -// milliseconds: milliseconds, -// timeZoneOffset: timeZoneOffset -// ) -//} -// -//func dateWithBytes(bytes: String) -> Date? { -// guard !bytes.isEmpty else { return nil } -// -// if bytes.range(of: "-") != nil { -// return parseW3C(bytes: bytes) -// } -// return parsePubDate(bytes: bytes) -//} diff --git a/Modules/Parser/Tests/DateParserTests/DateParserTests.swift b/Modules/Parser/Tests/DateParserTests/DateParserTests.swift index f96d9b5e1..948e5aacb 100644 --- a/Modules/Parser/Tests/DateParserTests/DateParserTests.swift +++ b/Modules/Parser/Tests/DateParserTests/DateParserTests.swift @@ -11,7 +11,7 @@ import XCTest class DateParserTests: XCTestCase { - func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int) -> Date { + func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ millisecond: Int = 0) -> Date { var dateComponents = DateComponents() dateComponents.calendar = Calendar.current dateComponents.timeZone = TimeZone(secondsFromGMT: 0) @@ -22,7 +22,8 @@ class DateParserTests: XCTestCase { dateComponents.hour = hour dateComponents.minute = minute dateComponents.second = second - + dateComponents.nanosecond = millisecond * 1000000 + return dateComponents.date! } @@ -100,11 +101,33 @@ class DateParserTests: XCTestCase { XCTAssertEqual(d, expectedDateResult) } -// func testHighMillisecondDate() { -// let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56) -// let d = date("2021-03-29T10:46:56.516941+00:00") -// XCTAssertEqual(d, expectedDateResult) -// } + func testMillisecondDate() { + let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56, 516) + let d = date("2021-03-29T10:46:56.516+00:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testExtraMillisecondPrecisionDate() { + let expectedDateResult = dateWithValues(2021, 03, 29, 10, 46, 56, 516) + let d = date("2021-03-29T10:46:56.516941+00:00") + XCTAssertEqual(d, expectedDateResult) + } + + func testW3CParsingPerformance() { + + // 0.0001 seconds on my Mac Studio M1 + self.measure { + _ = date("2021-03-29T10:46:56.516941+00:00") + } + } + + func testPubDateParsingPerformance() { + + // 0.0001 seconds on my Mac Studio M1 + self.measure { + _ = date("21 May 2010 21:22:53 GMT") + } + } } private extension DateParserTests { From 24e7eb90f6da02ed0aed28c08a98df2300e4bda8 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 9 Sep 2024 20:54:42 -0700 Subject: [PATCH 32/88] Continue progress on porting feed parsers. --- ...78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist | 10 + .../Sources/FeedParser/Feeds/FeedParser.swift | 124 ++--- .../Sources/FeedParser/Feeds/FeedType.swift | 82 +-- .../Feeds/JSON/JSONFeedParser.swift | 490 +++++++++--------- .../Feeds/JSON/RSSInJSONParser.swift | 358 ++++++------- .../Sources/FeedParser/Feeds/ParsedItem.swift | 5 + .../FeedParser/Feeds/XML/AtomParser.swift | 20 +- .../Feeds/XML/RSParsedFeedTransformer.swift | 124 ++--- .../FeedParser/Feeds/XML/RSSParser.swift | 203 ++++++-- Modules/Parser/Sources/SAX/SAXUtilities.swift | 2 +- 10 files changed, 789 insertions(+), 629 deletions(-) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist index d7dbb6cb7..d2b13bd80 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/DateParserTests.xcbaseline/78BB49A7-AEB4-40A1-83DA-EB9C5755E396.plist @@ -6,6 +6,16 @@ DateParserTests + testPubDateParsingPerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000131 + baselineIntegrationDisplayName + Local Baseline + + testW3CParsingPerformance() com.apple.XCTPerformanceMetric_WallClockTime diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index d1e78b464..64f6e5e97 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -12,65 +12,65 @@ import SAX // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. // You don’t need to know the type of feed. -public struct FeedParser { - - public static func canParse(_ parserData: ParserData) -> Bool { - - let type = feedType(parserData) - - switch type { - case .jsonFeed, .rssInJSON, .rss, .atom: - return true - default: - return false - } - } - - public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? { - - let type = feedType(parserData) - - switch type { - - case .jsonFeed: - return try JSONFeedParser.parse(parserData) - - case .rssInJSON: - return try RSSInJSONParser.parse(parserData) - - case .rss: - return RSSParser.parse(parserData) - - case .atom: - return AtomParser.parse(parserData) - - case .unknown, .notAFeed: - return nil - } - } - - /// For unit tests measuring performance. - public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? { - - let type = feedType(parserData) - - switch type { - - case .jsonFeed: - return try JSONFeedParser.parse(parserData) - - case .rssInJSON: - return try RSSInJSONParser.parse(parserData) - - case .rss: - return RSSParser.parse(parserData) - - case .atom: - return AtomParser.parse(parserData) - - case .unknown, .notAFeed: - return nil - } - } - -} +//public struct FeedParser { +// +// public static func canParse(_ parserData: ParserData) -> Bool { +// +// let type = feedType(parserData) +// +// switch type { +// case .jsonFeed, .rssInJSON, .rss, .atom: +// return true +// default: +// return false +// } +// } +// +// public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? { +// +// let type = feedType(parserData) +// +// switch type { +// +// case .jsonFeed: +// return try JSONFeedParser.parse(parserData) +// +// case .rssInJSON: +// return try RSSInJSONParser.parse(parserData) +// +// case .rss: +// return RSSParser.parse(parserData) +// +// case .atom: +// return AtomParser.parse(parserData) +// +// case .unknown, .notAFeed: +// return nil +// } +// } +// +// /// For unit tests measuring performance. +// public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? { +// +// let type = feedType(parserData) +// +// switch type { +// +// case .jsonFeed: +// return try JSONFeedParser.parse(parserData) +// +// case .rssInJSON: +// return try RSSInJSONParser.parse(parserData) +// +// case .rss: +// return RSSParser.parse(parserData) +// +// case .atom: +// return AtomParser.parse(parserData) +// +// case .unknown, .notAFeed: +// return nil +// } +// } +// +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index 8cf33225a..f934c8920 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -19,44 +19,44 @@ public enum FeedType: Sendable { } -private let minNumberOfBytesRequired = 128 - -public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { - - // Can call with partial data — while still downloading, for instance. - // If there’s not enough data, return .unknown. Ask again when there’s more data. - // If it’s definitely not a feed, return .notAFeed. - // - // This is fast enough to call on the main thread. - - if parserData.data.count < minNumberOfBytesRequired { - return .unknown - } - - let nsdata = parserData.data as NSData - - if nsdata.isProbablyJSONFeed() { - return .jsonFeed - } - if nsdata.isProbablyRSSInJSON() { - return .rssInJSON - } - if nsdata.isProbablyRSS() { - return .rss - } - if nsdata.isProbablyAtom() { - return .atom - } - - if isPartialData && nsdata.isProbablyJSON() { - // Might not be able to detect a JSON Feed without all data. - // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) - // has, at this writing, the JSON version element at the end of the feed, - // which is totally legal — but it means not being able to detect - // that it’s a JSON Feed without all the data. - // So this returns .unknown instead of .notAFeed. - return .unknown - } - - return .notAFeed -} +//private let minNumberOfBytesRequired = 128 +// +//public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { +// +// // Can call with partial data — while still downloading, for instance. +// // If there’s not enough data, return .unknown. Ask again when there’s more data. +// // If it’s definitely not a feed, return .notAFeed. +// // +// // This is fast enough to call on the main thread. +// +// if parserData.data.count < minNumberOfBytesRequired { +// return .unknown +// } +// +// let nsdata = parserData.data as NSData +// +// if nsdata.isProbablyJSONFeed() { +// return .jsonFeed +// } +// if nsdata.isProbablyRSSInJSON() { +// return .rssInJSON +// } +// if nsdata.isProbablyRSS() { +// return .rss +// } +// if nsdata.isProbablyAtom() { +// return .atom +// } +// +// if isPartialData && nsdata.isProbablyJSON() { +// // Might not be able to detect a JSON Feed without all data. +// // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) +// // has, at this writing, the JSON version element at the end of the feed, +// // which is totally legal — but it means not being able to detect +// // that it’s a JSON Feed without all the data. +// // So this returns .unknown instead of .notAFeed. +// return .unknown +// } +// +// return .notAFeed +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift index 247c18612..723ec1afb 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -1,248 +1,248 @@ +//// +//// JSONFeedParser.swift +//// RSParser +//// +//// Created by Brent Simmons on 6/25/17. +//// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +//// // -// JSONFeedParser.swift -// RSParser +//import Foundation +//import SAX // -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +//// See https://jsonfeed.org/version/1.1 // - -import Foundation -import SAX - -// See https://jsonfeed.org/version/1.1 - -public struct JSONFeedParser { - - struct Key { - static let version = "version" - static let items = "items" - static let title = "title" - static let homePageURL = "home_page_url" - static let feedURL = "feed_url" - static let feedDescription = "description" - static let nextURL = "next_url" - static let icon = "icon" - static let favicon = "favicon" - static let expired = "expired" - static let author = "author" - static let authors = "authors" - static let name = "name" - static let url = "url" - static let avatar = "avatar" - static let hubs = "hubs" - static let type = "type" - static let contentHTML = "content_html" - static let contentText = "content_text" - static let externalURL = "external_url" - static let summary = "summary" - static let image = "image" - static let bannerImage = "banner_image" - static let datePublished = "date_published" - static let dateModified = "date_modified" - static let tags = "tags" - static let uniqueID = "id" - static let attachments = "attachments" - static let mimeType = "mime_type" - static let sizeInBytes = "size_in_bytes" - static let durationInSeconds = "duration_in_seconds" - static let language = "language" - } - - static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. - - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { - - guard let d = JSONUtilities.dictionary(with: parserData.data) else { - throw FeedParserError(.invalidJSON) - } - - guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else { - throw FeedParserError(.jsonFeedVersionNotFound) - } - guard let itemsArray = d[Key.items] as? JSONArray else { - throw FeedParserError(.jsonFeedItemsNotFound) - } - guard let title = d[Key.title] as? String else { - throw FeedParserError(.jsonFeedTitleNotFound) - } - - let authors = parseAuthors(d) - let homePageURL = d[Key.homePageURL] as? String - let feedURL = d[Key.feedURL] as? String ?? parserData.url - let feedDescription = d[Key.feedDescription] as? String - let nextURL = d[Key.nextURL] as? String - let iconURL = d[Key.icon] as? String - let faviconURL = d[Key.favicon] as? String - let expired = d[Key.expired] as? Bool ?? false - let hubs = parseHubs(d) - let language = d[Key.language] as? String - - let items = parseItems(itemsArray, parserData.url) - - return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items) - } -} - -private extension JSONFeedParser { - - static func parseAuthors(_ dictionary: JSONDictionary) -> Set? { - - if let authorsArray = dictionary[Key.authors] as? JSONArray { - var authors = Set() - for author in authorsArray { - if let parsedAuthor = parseAuthor(author) { - authors.insert(parsedAuthor) - } - } - return authors - } - - guard let authorDictionary = dictionary[Key.author] as? JSONDictionary, - let parsedAuthor = parseAuthor(authorDictionary) else { - return nil - } - - return Set([parsedAuthor]) - } - - static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? { - let name = dictionary[Key.name] as? String - let url = dictionary[Key.url] as? String - let avatar = dictionary[Key.avatar] as? String - if name == nil && url == nil && avatar == nil { - return nil - } - return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil) - } - - static func parseHubs(_ dictionary: JSONDictionary) -> Set? { - - guard let hubsArray = dictionary[Key.hubs] as? JSONArray else { - return nil - } - - let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in - guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else { - return nil - } - return ParsedHub(type: hubType, url: hubURL) - } - return hubs.isEmpty ? nil : Set(hubs) - } - - static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set { - - return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in - return parseItem(oneItemDictionary, feedURL) - }) - } - - static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { - - guard let uniqueID = parseUniqueID(itemDictionary) else { - return nil - } - - let contentHTML = itemDictionary[Key.contentHTML] as? String - let contentText = itemDictionary[Key.contentText] as? String - if contentHTML == nil && contentText == nil { - return nil - } - - let url = itemDictionary[Key.url] as? String - let externalURL = itemDictionary[Key.externalURL] as? String - let title = parseTitle(itemDictionary, feedURL) - let language = itemDictionary[Key.language] as? String - let summary = itemDictionary[Key.summary] as? String - let imageURL = itemDictionary[Key.image] as? String - let bannerImageURL = itemDictionary[Key.bannerImage] as? String - - let datePublished = parseDate(itemDictionary[Key.datePublished] as? String) - let dateModified = parseDate(itemDictionary[Key.dateModified] as? String) - - let authors = parseAuthors(itemDictionary) - var tags: Set? = nil - if let tagsArray = itemDictionary[Key.tags] as? [String] { - tags = Set(tagsArray) - } - let attachments = parseAttachments(itemDictionary) - - return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments) - } - - static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? { - - guard let title = itemDictionary[Key.title] as? String else { - return nil - } - - if isSpecialCaseTitleWithEntitiesFeed(feedURL) { - return (title as NSString).rsparser_stringByDecodingHTMLEntities() - } - - return title - } - - static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool { - - // As of 16 Feb. 2018, Kottke’s and Heer’s feeds includes HTML entities in the title elements. - // If we find more feeds like this, we’ll add them here. If these feeds get fixed, we’ll remove them. - - let lowerFeedURL = feedURL.lowercased() - let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"] - for matchString in matchStrings { - if lowerFeedURL.contains(matchString) { - return true - } - } - - return false - } - - static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? { - - if let uniqueID = itemDictionary[Key.uniqueID] as? String { - return uniqueID // Spec says it must be a string - } - // Version 1 spec also says that if it’s a number, even though that’s incorrect, it should be coerced to a string. - if let uniqueID = itemDictionary[Key.uniqueID] as? Int { - return "\(uniqueID)" - } - if let uniqueID = itemDictionary[Key.uniqueID] as? Double { - return "\(uniqueID)" - } - return nil - } - - static func parseDate(_ dateString: String?) -> Date? { - - guard let dateString = dateString, !dateString.isEmpty else { - return nil - } - return RSDateWithString(dateString) - } - - static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { - - guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else { - return nil - } - return Set(attachmentsArray.compactMap { parseAttachment($0) }) - } - - static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? { - - guard let url = attachmentObject[Key.url] as? String else { - return nil - } - guard let mimeType = attachmentObject[Key.mimeType] as? String else { - return nil - } - - let title = attachmentObject[Key.title] as? String - let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int - let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int - - return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds) - } -} +//public struct JSONFeedParser { +// +// struct Key { +// static let version = "version" +// static let items = "items" +// static let title = "title" +// static let homePageURL = "home_page_url" +// static let feedURL = "feed_url" +// static let feedDescription = "description" +// static let nextURL = "next_url" +// static let icon = "icon" +// static let favicon = "favicon" +// static let expired = "expired" +// static let author = "author" +// static let authors = "authors" +// static let name = "name" +// static let url = "url" +// static let avatar = "avatar" +// static let hubs = "hubs" +// static let type = "type" +// static let contentHTML = "content_html" +// static let contentText = "content_text" +// static let externalURL = "external_url" +// static let summary = "summary" +// static let image = "image" +// static let bannerImage = "banner_image" +// static let datePublished = "date_published" +// static let dateModified = "date_modified" +// static let tags = "tags" +// static let uniqueID = "id" +// static let attachments = "attachments" +// static let mimeType = "mime_type" +// static let sizeInBytes = "size_in_bytes" +// static let durationInSeconds = "duration_in_seconds" +// static let language = "language" +// } +// +// static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. +// +// public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { +// +// guard let d = JSONUtilities.dictionary(with: parserData.data) else { +// throw FeedParserError(.invalidJSON) +// } +// +// guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else { +// throw FeedParserError(.jsonFeedVersionNotFound) +// } +// guard let itemsArray = d[Key.items] as? JSONArray else { +// throw FeedParserError(.jsonFeedItemsNotFound) +// } +// guard let title = d[Key.title] as? String else { +// throw FeedParserError(.jsonFeedTitleNotFound) +// } +// +// let authors = parseAuthors(d) +// let homePageURL = d[Key.homePageURL] as? String +// let feedURL = d[Key.feedURL] as? String ?? parserData.url +// let feedDescription = d[Key.feedDescription] as? String +// let nextURL = d[Key.nextURL] as? String +// let iconURL = d[Key.icon] as? String +// let faviconURL = d[Key.favicon] as? String +// let expired = d[Key.expired] as? Bool ?? false +// let hubs = parseHubs(d) +// let language = d[Key.language] as? String +// +// let items = parseItems(itemsArray, parserData.url) +// +// return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items) +// } +//} +// +//private extension JSONFeedParser { +// +// static func parseAuthors(_ dictionary: JSONDictionary) -> Set? { +// +// if let authorsArray = dictionary[Key.authors] as? JSONArray { +// var authors = Set() +// for author in authorsArray { +// if let parsedAuthor = parseAuthor(author) { +// authors.insert(parsedAuthor) +// } +// } +// return authors +// } +// +// guard let authorDictionary = dictionary[Key.author] as? JSONDictionary, +// let parsedAuthor = parseAuthor(authorDictionary) else { +// return nil +// } +// +// return Set([parsedAuthor]) +// } +// +// static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? { +// let name = dictionary[Key.name] as? String +// let url = dictionary[Key.url] as? String +// let avatar = dictionary[Key.avatar] as? String +// if name == nil && url == nil && avatar == nil { +// return nil +// } +// return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil) +// } +// +// static func parseHubs(_ dictionary: JSONDictionary) -> Set? { +// +// guard let hubsArray = dictionary[Key.hubs] as? JSONArray else { +// return nil +// } +// +// let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in +// guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else { +// return nil +// } +// return ParsedHub(type: hubType, url: hubURL) +// } +// return hubs.isEmpty ? nil : Set(hubs) +// } +// +// static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set { +// +// return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in +// return parseItem(oneItemDictionary, feedURL) +// }) +// } +// +// static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { +// +// guard let uniqueID = parseUniqueID(itemDictionary) else { +// return nil +// } +// +// let contentHTML = itemDictionary[Key.contentHTML] as? String +// let contentText = itemDictionary[Key.contentText] as? String +// if contentHTML == nil && contentText == nil { +// return nil +// } +// +// let url = itemDictionary[Key.url] as? String +// let externalURL = itemDictionary[Key.externalURL] as? String +// let title = parseTitle(itemDictionary, feedURL) +// let language = itemDictionary[Key.language] as? String +// let summary = itemDictionary[Key.summary] as? String +// let imageURL = itemDictionary[Key.image] as? String +// let bannerImageURL = itemDictionary[Key.bannerImage] as? String +// +// let datePublished = parseDate(itemDictionary[Key.datePublished] as? String) +// let dateModified = parseDate(itemDictionary[Key.dateModified] as? String) +// +// let authors = parseAuthors(itemDictionary) +// var tags: Set? = nil +// if let tagsArray = itemDictionary[Key.tags] as? [String] { +// tags = Set(tagsArray) +// } +// let attachments = parseAttachments(itemDictionary) +// +// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments) +// } +// +// static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? { +// +// guard let title = itemDictionary[Key.title] as? String else { +// return nil +// } +// +// if isSpecialCaseTitleWithEntitiesFeed(feedURL) { +// return (title as NSString).rsparser_stringByDecodingHTMLEntities() +// } +// +// return title +// } +// +// static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool { +// +// // As of 16 Feb. 2018, Kottke’s and Heer’s feeds includes HTML entities in the title elements. +// // If we find more feeds like this, we’ll add them here. If these feeds get fixed, we’ll remove them. +// +// let lowerFeedURL = feedURL.lowercased() +// let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"] +// for matchString in matchStrings { +// if lowerFeedURL.contains(matchString) { +// return true +// } +// } +// +// return false +// } +// +// static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? { +// +// if let uniqueID = itemDictionary[Key.uniqueID] as? String { +// return uniqueID // Spec says it must be a string +// } +// // Version 1 spec also says that if it’s a number, even though that’s incorrect, it should be coerced to a string. +// if let uniqueID = itemDictionary[Key.uniqueID] as? Int { +// return "\(uniqueID)" +// } +// if let uniqueID = itemDictionary[Key.uniqueID] as? Double { +// return "\(uniqueID)" +// } +// return nil +// } +// +// static func parseDate(_ dateString: String?) -> Date? { +// +// guard let dateString = dateString, !dateString.isEmpty else { +// return nil +// } +// return RSDateWithString(dateString) +// } +// +// static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { +// +// guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else { +// return nil +// } +// return Set(attachmentsArray.compactMap { parseAttachment($0) }) +// } +// +// static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? { +// +// guard let url = attachmentObject[Key.url] as? String else { +// return nil +// } +// guard let mimeType = attachmentObject[Key.mimeType] as? String else { +// return nil +// } +// +// let title = attachmentObject[Key.title] as? String +// let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int +// let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int +// +// return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds) +// } +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift index e27c0e629..4bf2ad624 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift @@ -1,182 +1,182 @@ +//// +//// RSSInJSONParser.swift +//// RSParser +//// +//// Created by Brent Simmons on 6/24/17. +//// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +//// // -// RSSInJSONParser.swift -// RSParser +//import Foundation +//import SAX // -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +//// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md +//// Also: http://cyber.harvard.edu/rss/rss.html // - -import Foundation -import SAX - -// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md -// Also: http://cyber.harvard.edu/rss/rss.html - -public struct RSSInJSONParser { - - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { - - do { - guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { - throw FeedParserError(.invalidJSON) - } - guard let rssObject = parsedObject["rss"] as? JSONDictionary else { - throw FeedParserError(.rssChannelNotFound) - } - guard let channelObject = rssObject["channel"] as? JSONDictionary else { - throw FeedParserError(.rssChannelNotFound) - } - - // I’d bet money that in practice the items array won’t always appear correctly inside the channel object. - // I’d also bet that sometimes it gets called "items" instead of "item". - var itemsObject = channelObject["item"] as? JSONArray - if itemsObject == nil { - itemsObject = parsedObject["item"] as? JSONArray - } - if itemsObject == nil { - itemsObject = channelObject["items"] as? JSONArray - } - if itemsObject == nil { - itemsObject = parsedObject["items"] as? JSONArray - } - if itemsObject == nil { - throw FeedParserError(.rssItemsNotFound) - } - - let title = channelObject["title"] as? String - let homePageURL = channelObject["link"] as? String - let feedURL = parserData.url - let feedDescription = channelObject["description"] as? String - let feedLanguage = channelObject["language"] as? String - - let items = parseItems(itemsObject!, parserData.url) - - return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) - - } - catch { throw error } - } -} - -private extension RSSInJSONParser { - - static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set { - - return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in - - return parsedItemWithDictionary(oneItemDictionary, feedURL) - }) - } - - static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { - - let externalURL = itemDictionary["link"] as? String - let title = itemDictionary["title"] as? String - - var contentHTML = itemDictionary["description"] as? String - var contentText: String? = nil - if contentHTML != nil && !(contentHTML!.contains("<")) { - contentText = contentHTML - contentHTML = nil - } - if contentHTML == nil && contentText == nil && title == nil { - return nil - } - - var datePublished: Date? = nil - if let datePublishedString = itemDictionary["pubDate"] as? String { - datePublished = RSDateWithString(datePublishedString) - } - - let authors = parseAuthors(itemDictionary) - let tags = parseTags(itemDictionary) - let attachments = parseAttachments(itemDictionary) - - var uniqueID: String? = itemDictionary["guid"] as? String - if uniqueID == nil { - - // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result. - // Items should have guids. When they don't, re-runs are very likely - // because there's no other 100% reliable way to determine identity. - // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.) - - var s = "" - if let datePublished = datePublished { - s += "\(datePublished.timeIntervalSince1970)" - } - if let title = title { - s += title - } - if let externalURL = externalURL { - s += externalURL - } - if let authorEmailAddress = authors?.first?.emailAddress { - s += authorEmailAddress - } - if let oneAttachmentURL = attachments?.first?.url { - s += oneAttachmentURL - } - if s.isEmpty { - // Sheesh. Tough case. - if let _ = contentHTML { - s = contentHTML! - } - if let _ = contentText { - s = contentText! - } - } - uniqueID = (s as NSString).rsparser_md5Hash() - } - - if let uniqueID = uniqueID { - return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments) - } - return nil - } - - static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set? { - - guard let authorEmailAddress = itemDictionary["author"] as? String else { - return nil - } - let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) - return Set([parsedAuthor]) - } - - static func parseTags(_ itemDictionary: JSONDictionary) -> Set? { - - if let categoryObject = itemDictionary["category"] as? JSONDictionary { - if let oneTag = categoryObject["#value"] as? String { - return Set([oneTag]) - } - return nil - } - else if let categoryArray = itemDictionary["category"] as? JSONArray { - return Set(categoryArray.compactMap{ $0["#value"] as? String }) - } - return nil - } - - static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { - - guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else { - return nil - } - guard let attachmentURL = enclosureObject["url"] as? String else { - return nil - } - - var attachmentSize = enclosureObject["length"] as? Int - if attachmentSize == nil { - if let attachmentSizeString = enclosureObject["length"] as? String { - attachmentSize = (attachmentSizeString as NSString).integerValue - } - } - - let type = enclosureObject["type"] as? String - if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) { - return Set([attachment]) - } - return nil - } -} +//public struct RSSInJSONParser { +// +// public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { +// +// do { +// guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { +// throw FeedParserError(.invalidJSON) +// } +// guard let rssObject = parsedObject["rss"] as? JSONDictionary else { +// throw FeedParserError(.rssChannelNotFound) +// } +// guard let channelObject = rssObject["channel"] as? JSONDictionary else { +// throw FeedParserError(.rssChannelNotFound) +// } +// +// // I’d bet money that in practice the items array won’t always appear correctly inside the channel object. +// // I’d also bet that sometimes it gets called "items" instead of "item". +// var itemsObject = channelObject["item"] as? JSONArray +// if itemsObject == nil { +// itemsObject = parsedObject["item"] as? JSONArray +// } +// if itemsObject == nil { +// itemsObject = channelObject["items"] as? JSONArray +// } +// if itemsObject == nil { +// itemsObject = parsedObject["items"] as? JSONArray +// } +// if itemsObject == nil { +// throw FeedParserError(.rssItemsNotFound) +// } +// +// let title = channelObject["title"] as? String +// let homePageURL = channelObject["link"] as? String +// let feedURL = parserData.url +// let feedDescription = channelObject["description"] as? String +// let feedLanguage = channelObject["language"] as? String +// +// let items = parseItems(itemsObject!, parserData.url) +// +// return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) +// +// } +// catch { throw error } +// } +//} +// +//private extension RSSInJSONParser { +// +// static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set { +// +// return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in +// +// return parsedItemWithDictionary(oneItemDictionary, feedURL) +// }) +// } +// +// static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { +// +// let externalURL = itemDictionary["link"] as? String +// let title = itemDictionary["title"] as? String +// +// var contentHTML = itemDictionary["description"] as? String +// var contentText: String? = nil +// if contentHTML != nil && !(contentHTML!.contains("<")) { +// contentText = contentHTML +// contentHTML = nil +// } +// if contentHTML == nil && contentText == nil && title == nil { +// return nil +// } +// +// var datePublished: Date? = nil +// if let datePublishedString = itemDictionary["pubDate"] as? String { +// datePublished = RSDateWithString(datePublishedString) +// } +// +// let authors = parseAuthors(itemDictionary) +// let tags = parseTags(itemDictionary) +// let attachments = parseAttachments(itemDictionary) +// +// var uniqueID: String? = itemDictionary["guid"] as? String +// if uniqueID == nil { +// +// // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result. +// // Items should have guids. When they don't, re-runs are very likely +// // because there's no other 100% reliable way to determine identity. +// // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.) +// +// var s = "" +// if let datePublished = datePublished { +// s += "\(datePublished.timeIntervalSince1970)" +// } +// if let title = title { +// s += title +// } +// if let externalURL = externalURL { +// s += externalURL +// } +// if let authorEmailAddress = authors?.first?.emailAddress { +// s += authorEmailAddress +// } +// if let oneAttachmentURL = attachments?.first?.url { +// s += oneAttachmentURL +// } +// if s.isEmpty { +// // Sheesh. Tough case. +// if let _ = contentHTML { +// s = contentHTML! +// } +// if let _ = contentText { +// s = contentText! +// } +// } +// uniqueID = (s as NSString).rsparser_md5Hash() +// } +// +// if let uniqueID = uniqueID { +// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments) +// } +// return nil +// } +// +// static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set? { +// +// guard let authorEmailAddress = itemDictionary["author"] as? String else { +// return nil +// } +// let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) +// return Set([parsedAuthor]) +// } +// +// static func parseTags(_ itemDictionary: JSONDictionary) -> Set? { +// +// if let categoryObject = itemDictionary["category"] as? JSONDictionary { +// if let oneTag = categoryObject["#value"] as? String { +// return Set([oneTag]) +// } +// return nil +// } +// else if let categoryArray = itemDictionary["category"] as? JSONArray { +// return Set(categoryArray.compactMap{ $0["#value"] as? String }) +// } +// return nil +// } +// +// static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { +// +// guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else { +// return nil +// } +// guard let attachmentURL = enclosureObject["url"] as? String else { +// return nil +// } +// +// var attachmentSize = enclosureObject["length"] as? Int +// if attachmentSize == nil { +// if let attachmentSizeString = enclosureObject["length"] as? String { +// attachmentSize = (attachmentSizeString as NSString).integerValue +// } +// } +// +// let type = enclosureObject["type"] as? String +// if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) { +// return Set([attachment]) +// } +// return nil +// } +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift index c9fc2eeb8..d158c74dd 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift @@ -63,5 +63,10 @@ public final class ParsedItem: Hashable, Sendable { hasher.combine(feedURL) } } + + public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool { + + lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments + } } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 43fee7810..e3b4610e7 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -17,13 +17,13 @@ import SAX // // In general, you should see FeedParser.swift for all your feed-parsing needs. -public struct AtomParser { - - public static func parse(_ parserData: ParserData) -> ParsedFeed? { - - if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) { - return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) - } - return nil - } -} +//public struct AtomParser { +// +// public static func parse(_ parserData: ParserData) -> ParsedFeed? { +// +// if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) { +// return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) +// } +// return nil +// } +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift index c6d0b2ba6..9f3bc74ce 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift @@ -13,65 +13,65 @@ import Foundation // These functions take an RSParsedFeed and return a Swift-y ParsedFeed, // which is part of providing a single API for feed parsing. -struct RSParsedFeedTransformer { - - static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed { - - let items = parsedItems(rsParsedFeed.articles) - return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) - } -} - -private extension RSParsedFeedTransformer { - - static func parsedItems(_ parsedArticles: Set) -> Set { - - // Create Set from Set - - return Set(parsedArticles.map(parsedItem)) - } - - static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem { - - let uniqueID = parsedArticle.articleID - let url = parsedArticle.permalink - let externalURL = parsedArticle.link - let title = parsedArticle.title - let language = parsedArticle.language - let contentHTML = parsedArticle.body - let datePublished = parsedArticle.datePublished - let dateModified = parsedArticle.dateModified - let authors = parsedAuthors(parsedArticle.authors) - let attachments = parsedAttachments(parsedArticle.enclosures) - - return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) - } - - static func parsedAuthors(_ authors: Set?) -> Set? { - - guard let authors = authors, !authors.isEmpty else { - return nil - } - - let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in - return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) - } - - return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) - } - - static func parsedAttachments(_ enclosures: Set?) -> Set? { - - guard let enclosures = enclosures, !enclosures.isEmpty else { - return nil - } - - let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in - - let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil - return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) - } - - return attachments.isEmpty ? nil : Set(attachments) - } -} +//struct RSParsedFeedTransformer { +// +// static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed { +// +// let items = parsedItems(rsParsedFeed.articles) +// return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) +// } +//} +// +//private extension RSParsedFeedTransformer { +// +// static func parsedItems(_ parsedArticles: Set) -> Set { +// +// // Create Set from Set +// +// return Set(parsedArticles.map(parsedItem)) +// } +// +// static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem { +// +// let uniqueID = parsedArticle.articleID +// let url = parsedArticle.permalink +// let externalURL = parsedArticle.link +// let title = parsedArticle.title +// let language = parsedArticle.language +// let contentHTML = parsedArticle.body +// let datePublished = parsedArticle.datePublished +// let dateModified = parsedArticle.dateModified +// let authors = parsedAuthors(parsedArticle.authors) +// let attachments = parsedAttachments(parsedArticle.enclosures) +// +// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) +// } +// +// static func parsedAuthors(_ authors: Set?) -> Set? { +// +// guard let authors = authors, !authors.isEmpty else { +// return nil +// } +// +// let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in +// return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) +// } +// +// return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) +// } +// +// static func parsedAttachments(_ enclosures: Set?) -> Set? { +// +// guard let enclosures = enclosures, !enclosures.isEmpty else { +// return nil +// } +// +// let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in +// +// let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil +// return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) +// } +// +// return attachments.isEmpty ? nil : Set(attachments) +// } +//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index 6410d8ae8..64b1680f0 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -33,7 +33,7 @@ public final class RSSParser { private var parsingAuthor = false private var currentAttributes: SAXParser.XMLAttributesDictionary? - public static func parsedFeed(with parserData: ParserData) -> RSSFeed { + static func parsedFeed(with parserData: ParserData) -> RSSFeed { let parser = RSSParser(parserData) parser.parse() @@ -48,6 +48,12 @@ public final class RSSParser { private extension RSSParser { + func parse() { + + let saxParser = SAXParser(delegate: self, data: data) + saxParser.parse() + } + private struct XMLName { static let uppercaseRDF = "RDF".utf8CString static let item = "item".utf8CString @@ -63,9 +69,13 @@ private extension RSSParser { static let dc = "dc".utf8CString static let content = "content".utf8CString static let encoded = "encoded".utf8CString + static let creator = "creator".utf8CString + static let date = "date".utf8CString + static let pubDate = "pubDate".utf8CString + static let description = "description".utf8CString } - func addFeedElement(_ localName: XMLPointer, _ prefix: XMLPointer?) { + func addFeedElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { guard prefix == nil else { return @@ -73,14 +83,14 @@ private extension RSSParser { if SAXEqualTags(localName, XMLName.link) { if feed.link == nil { - feed.link = currentString + feed.link = saxParser.currentString } } else if SAXEqualTags(localName, XMLName.title) { - feed.title = currentString + feed.title = saxParser.currentString } else if SAXEqualTags(localName, XMLName.language) { - feed.language = currentString + feed.language = saxParser.currentString } } @@ -91,13 +101,17 @@ private extension RSSParser { func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { - if SAXEqualTags(prefix, XMLName.dc) { - addDCElement(localName) - return; + guard let currentArticle else { + return } - if SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) { - if let currentString, !currentString.isEmpty { + if let prefix, SAXEqualTags(prefix, XMLName.dc) { + addDCElement(saxParser, localName, currentArticle) + return + } + + if let prefix, SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) { + if let currentString = saxParser.currentString, !currentString.isEmpty { currentArticle.body = currentString } return @@ -107,40 +121,171 @@ private extension RSSParser { return } - if SAXEqualTags(localName, XMLName.guid) { - addGuid() + if let currentString = saxParser.currentString { + if SAXEqualTags(localName, XMLName.guid) { + addGuid(currentString, currentArticle) + } + else if SAXEqualTags(localName, XMLName.author) { + addAuthorWithString(currentString, currentArticle) + } + else if SAXEqualTags(localName, XMLName.link) { + currentArticle.link = urlString(currentString) + } + else if SAXEqualTags(localName, XMLName.description) { + if currentArticle.body == nil { + currentArticle.body = currentString + } + } + else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) { + currentArticle.title = currentString + } } else if SAXEqualTags(localName, XMLName.pubDate) { currentArticle.datePublished = currentDate(saxParser) } - else if SAXEqualTags(localName, XMLName.author) { - addAuthorWithString(currentString) + else if SAXEqualTags(localName, XMLName.enclosure), let currentAttributes { + addEnclosure(currentAttributes, currentArticle) } - else if SAXEqualTags(localName, XMLName.link) { - currentArticle.link = urlString(currentString) - } - else if SAXEqualTags(localName, XMLName.description) { - if currentArticle.body == nil { - currentArticle.body = currentString + } + + func addDCElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ currentArticle: RSSArticle) { + + if SAXEqualTags(localName, XMLName.creator) { + if let currentString = saxParser.currentString { + addAuthorWithString(currentString, currentArticle) } } - else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) { - if let currentString { - currentArticle.title = currentString + else if SAXEqualTags(localName, XMLName.date) { + currentArticle.datePublished = currentDate(saxParser) + } + } + + static let isPermalinkKey = "isPermaLink" + static let isPermalinkLowercaseKey = "ispermalink" + static let falseValue = "false" + + func addGuid(_ guid: String, _ currentArticle: RSSArticle) { + + currentArticle.guid = guid + + guard let currentAttributes else { + return + } + + let isPermaLinkValue: String? = { + + if let value = currentAttributes[Self.isPermalinkKey] { + return value } + // Allow for `ispermalink`, `isPermalink`, etc. + for (key, value) in currentAttributes { + if key.lowercased() == Self.isPermalinkLowercaseKey { + return value + } + } + + return nil + }() + + // Spec: `isPermaLink is optional, its default value is true.` + // https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt + // Return only if non-nil and equal to false — otherwise it’s a permalink. + if let isPermaLinkValue, isPermaLinkValue == Self.falseValue { + return } - else if SAXEqualTags(localName, XMLName.enclosure) { - addEnclosure() + + // Feed bug found in the wild: using a guid that’s not really a permalink + // and not realizing that `isPermaLink` is true by default. + if stringIsProbablyAURLOrRelativePath(guid) { + currentArticle.permalink = urlString(guid) } } + func stringIsProbablyAURLOrRelativePath(_ s: String) -> Bool { + + // The RSS guid is defined as a permalink, except when it appears like this: + // `some—identifier` + // However, people often seem to think it’s *not* a permalink by default, even + // though it is. So we try to detect the situation where the value is not a URL string, + // and not even a relative path. This may need to evolve over time. + + if !s.contains("/") { + // This seems to be just about the best possible check. + // Bad guids are often just integers, for instance. + return false + } + + if s.lowercased().hasPrefix("tag:") { + // A common non-URL guid form starts with `tag:`. + return false + } + + return true + } + + /// Do best attempt at turning a string into a URL string. + /// + /// If it already appears to be a URL, return it. + /// Otherwise, treat it like a relative URL and resolve using + /// the URL of the home page of the feed (if available) + /// or the URL of the feed. + /// + /// The returned value is not guaranteed to be a valid URL string. + /// It’s a best attempt without going to heroic lengths. + func urlString(_ s: String) -> String { + + if s.lowercased().hasPrefix("http") { + return s + } + + let baseURLString = feed.link ?? feedURL + guard let baseURL = URL(string: baseURLString) else { + return s + } + guard let resolvedURL = URL(string: s, relativeTo: baseURL) else { + return s + } + + return resolvedURL.absoluteString + } + + func addAuthorWithString(_ authorString: String, _ currentArticle: RSSArticle) { + + if authorString.isEmpty { + return + } + + let author = RSSAuthor(singleString: authorString) + currentArticle.addAuthor(author) + } + + private struct EnclosureKey { + static let url = "url" + static let length = "length" + static let type = "type" + } + + func addEnclosure(_ attributes: SAXParser.XMLAttributesDictionary, _ currentArticle: RSSArticle) { + + guard let url = attributes[EnclosureKey.url], !url.isEmpty else { + return + } + + let enclosure = RSSEnclosure(url: url) + if let lengthValue = attributes[EnclosureKey.length], let length = Int(lengthValue) { + enclosure.length = length + } + enclosure.mimeType = attributes[EnclosureKey.type] + + currentArticle.addEnclosure(enclosure) + } + func currentDate(_ saxParser: SAXParser) -> Date? { guard let data = saxParser.currentCharacters else { return nil } return DateParser.date(data: data) - } } @@ -157,8 +302,8 @@ extension RSSParser: SAXParserDelegate { return } - var xmlAttributes: XMLAttributesDictionary? = nil - if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(enclosure, XMLName.enclosure) { + var xmlAttributes: SAXParser.XMLAttributesDictionary? = nil + if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(localName, XMLName.enclosure) { xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) } if currentAttributes != xmlAttributes { @@ -169,7 +314,7 @@ extension RSSParser: SAXParserDelegate { addArticle() parsingArticle = true - if isRDF && let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid + if isRDF, let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid currentArticle.guid = rdfGuid currentArticle.permalink = rdfGuid } diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift index dccda4e9c..10ba86f0a 100644 --- a/Modules/Parser/Sources/SAX/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -1,5 +1,5 @@ // -// File.swift +// SAXUtilities.swift // // // Created by Brent Simmons on 8/26/24. From eeb27475dec4ed44a01e1aa15eecabe0e65965cc Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 9 Sep 2024 21:35:51 -0700 Subject: [PATCH 33/88] Continue progress on porting feed parsers. --- .../Sources/DateParser/DateParser.swift | 4 +- .../Sources/FeedParser/Feeds/FeedParser.swift | 22 +++--- .../Sources/FeedParser/Feeds/ParsedFeed.swift | 2 +- .../Sources/FeedParser/Feeds/ParsedItem.swift | 2 +- .../Feeds/XML/RSParsedFeedTransformer.swift | 77 ------------------- .../Feeds/XML/RSSFeedTransformer.swift | 75 ++++++++++++++++++ .../FeedParser/Feeds/XML/RSSParser.swift | 7 +- 7 files changed, 96 insertions(+), 93 deletions(-) delete mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift create mode 100644 Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift diff --git a/Modules/Parser/Sources/DateParser/DateParser.swift b/Modules/Parser/Sources/DateParser/DateParser.swift index 60c8d9465..4c9f9271d 100644 --- a/Modules/Parser/Sources/DateParser/DateParser.swift +++ b/Modules/Parser/Sources/DateParser/DateParser.swift @@ -335,7 +335,7 @@ private extension DateParser { timeInfo.tm_gmtoff = 0; timeInfo.tm_zone = nil; - var rawTime = timegm(&timeInfo) - timeZoneOffset + let rawTime = timegm(&timeInfo) - timeZoneOffset if rawTime == time_t(UInt32.max) { // NSCalendar is super-amazingly slow (which is partly why this parser exists), @@ -363,7 +363,7 @@ private extension DateParser { timeInterval += TimeInterval(TimeInterval(milliseconds) / 1000.0) } - return Date(timeIntervalSince1970: TimeInterval(timeInterval)) + return Date(timeIntervalSince1970: timeInterval) } // MARK: - Time Zones and Offsets diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index 64f6e5e97..5189a18c9 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -12,8 +12,8 @@ import SAX // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. // You don’t need to know the type of feed. -//public struct FeedParser { -// +public struct FeedParser { + // public static func canParse(_ parserData: ParserData) -> Bool { // // let type = feedType(parserData) @@ -25,9 +25,13 @@ import SAX // return false // } // } -// -// public static func parse(_ parserData: ParserData) async throws -> ParsedFeed? { -// + + public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + + let rssFeed = RSSParser.parsedFeed(with: parserData) + let parsedFeed = RSSFeedTransformer.parsedFeed(with: rssFeed) + + return parsedFeed // let type = feedType(parserData) // // switch type { @@ -47,8 +51,8 @@ import SAX // case .unknown, .notAFeed: // return nil // } -// } -// + } + // /// For unit tests measuring performance. // public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? { // @@ -72,5 +76,5 @@ import SAX // return nil // } // } -// -//} + +} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift index 6aef1b5c5..b19e9b03c 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift @@ -8,7 +8,7 @@ import Foundation -public class ParsedFeed: Sendable { +public final class ParsedFeed: Sendable { public let type: FeedType public let title: String? diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift index d158c74dd..2c3057835 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift @@ -66,7 +66,7 @@ public final class ParsedItem: Hashable, Sendable { public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool { - lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments + lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title && lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments } } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift deleted file mode 100644 index 9f3bc74ce..000000000 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSParsedFeedTransformer.swift +++ /dev/null @@ -1,77 +0,0 @@ -// -// RSParsedFeedTransformer.swift -// RSParser -// -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import Foundation - -// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago. -// They create an RSParsedFeed object and related Objective-C objects. -// These functions take an RSParsedFeed and return a Swift-y ParsedFeed, -// which is part of providing a single API for feed parsing. - -//struct RSParsedFeedTransformer { -// -// static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed { -// -// let items = parsedItems(rsParsedFeed.articles) -// return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) -// } -//} -// -//private extension RSParsedFeedTransformer { -// -// static func parsedItems(_ parsedArticles: Set) -> Set { -// -// // Create Set from Set -// -// return Set(parsedArticles.map(parsedItem)) -// } -// -// static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem { -// -// let uniqueID = parsedArticle.articleID -// let url = parsedArticle.permalink -// let externalURL = parsedArticle.link -// let title = parsedArticle.title -// let language = parsedArticle.language -// let contentHTML = parsedArticle.body -// let datePublished = parsedArticle.datePublished -// let dateModified = parsedArticle.dateModified -// let authors = parsedAuthors(parsedArticle.authors) -// let attachments = parsedAttachments(parsedArticle.enclosures) -// -// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) -// } -// -// static func parsedAuthors(_ authors: Set?) -> Set? { -// -// guard let authors = authors, !authors.isEmpty else { -// return nil -// } -// -// let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in -// return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) -// } -// -// return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) -// } -// -// static func parsedAttachments(_ enclosures: Set?) -> Set? { -// -// guard let enclosures = enclosures, !enclosures.isEmpty else { -// return nil -// } -// -// let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in -// -// let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil -// return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) -// } -// -// return attachments.isEmpty ? nil : Set(attachments) -// } -//} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift new file mode 100644 index 000000000..4c14907f1 --- /dev/null +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift @@ -0,0 +1,75 @@ +// +// RSSFeedTransformer.swift +// RSParser +// +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import Foundation + +struct RSSFeedTransformer { + + /// Turn an internal RSSFeed into a public ParsedFeed. + static func parsedFeed(with rssFeed: RSSFeed) -> ParsedFeed { + + let items = parsedItems(rssFeed.articles) + return ParsedFeed(type: .rss, title: rssFeed.title, homePageURL: rssFeed.link, feedURL: rssFeed.urlString, language: rssFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) + } +} + +private extension RSSFeedTransformer { + + static func parsedItems(_ articles: [RSSArticle]?) -> Set { + + guard let articles else { + return Set() + } + + return Set(articles.map(parsedItem)) + } + + static func parsedItem(_ article: RSSArticle) -> ParsedItem { + + let uniqueID = article.articleID + let url = article.permalink + let externalURL = article.link + let title = article.title + let language = article.language + let contentHTML = article.body + let datePublished = article.datePublished + let dateModified = article.dateModified + let authors = parsedAuthors(article.authors) + let attachments = parsedAttachments(article.enclosures) + + return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: article.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) + } + + static func parsedAuthors(_ authors: [RSSAuthor]?) -> Set? { + + guard let authors = authors, !authors.isEmpty else { + return nil + } + + let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in + return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) + } + + return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) + } + + static func parsedAttachments(_ enclosures: [RSSEnclosure]?) -> Set? { + + guard let enclosures = enclosures, !enclosures.isEmpty else { + return nil + } + + let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in + + let sizeInBytes = (enclosure.length ?? 0) > 0 ? enclosure.length : nil + return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) + } + + return attachments.isEmpty ? nil : Set(attachments) + } +} diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index 64b1680f0..ee12e0f1b 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -59,7 +59,6 @@ private extension RSSParser { static let item = "item".utf8CString static let guid = "guid".utf8CString static let enclosure = "enclosure".utf8CString - static let rdfAbout = "rdf:about".utf8CString static let image = "image".utf8CString static let author = "author".utf8CString static let rss = "rss".utf8CString @@ -291,6 +290,8 @@ private extension RSSParser { extension RSSParser: SAXParserDelegate { + static let rdfAbout = "rdf:about" + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { if endRSSFound { @@ -314,7 +315,7 @@ extension RSSParser: SAXParserDelegate { addArticle() parsingArticle = true - if isRDF, let rdfGuid = xmlAttributes?[XMLName.rdfAbout], let currentArticle { // RSS 1.0 guid + if isRDF, let rdfGuid = xmlAttributes?[Self.rdfAbout], let currentArticle { // RSS 1.0 guid currentArticle.guid = rdfGuid currentArticle.permalink = rdfGuid } @@ -358,7 +359,7 @@ extension RSSParser: SAXParserDelegate { } } else if !parsingChannelImage { - addFeedElement(localName, prefix) + addFeedElement(saxParser, localName, prefix) } } From b23888a20b62216e11bde0c4b3f542ef2786d0af Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 9 Sep 2024 21:49:46 -0700 Subject: [PATCH 34/88] =?UTF-8?q?Get=20RSSParserTests=20working=20?= =?UTF-8?q?=E2=80=94=C2=A0comment=20out=20all=20the=20other=20tests=20whic?= =?UTF-8?q?h=20aren=E2=80=99t=20expected=20to=20work=20yet.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../FeedParser/Feeds/XML/RSSParser.swift | 1 + .../FeedParserTests/AtomParserTests.swift | 194 ++++---- .../FeedParserTests/EntityDecodingTests.swift | 74 +-- .../FeedParserTests/FeedParserTypeTests.swift | 456 +++++++++--------- .../Tests/FeedParserTests/HTMLLinkTests.swift | 65 ++- .../FeedParserTests/HTMLMetadataTests.swift | 287 ++++++----- .../FeedParserTests/JSONFeedParserTests.swift | 228 ++++----- .../Tests/FeedParserTests/ParserTests.swift | 2 +- .../RSSInJSONParserTests.swift | 36 +- .../FeedParserTests/RSSParserTests.swift | 73 +-- 10 files changed, 717 insertions(+), 699 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index ee12e0f1b..e06007b59 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -52,6 +52,7 @@ private extension RSSParser { let saxParser = SAXParser(delegate: self, data: data) saxParser.parse() + feed.articles = articles } private struct XMLName { diff --git a/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift b/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift index dc9fb89d9..71647f10f 100644 --- a/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift @@ -7,101 +7,101 @@ // import XCTest -import Parser +import FeedParser -class AtomParserTests: XCTestCase { - - func testDaringFireballPerformance() { - - // 0.009 sec on my 2012 iMac. - let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed - self.measure { - let _ = try! FeedParser.parseSync(d) - } - } - - func testAllThisPerformance() { - - // 0.003 sec on my 2012 iMac. - let d = parserData("allthis", "atom", "http://leancrew.com/all-this") - self.measure { - let _ = try! FeedParser.parseSync(d) - } - } - - func testGettingHomePageLink() async { - - let d = parserData("allthis", "atom", "http://leancrew.com/all-this") - let parsedFeed = try! await FeedParser.parse(d)! - - XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this") - } - - func testDaringFireball() async { - - let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed - let parsedFeed = try! await FeedParser.parse(d)! - - for article in parsedFeed.items { - - XCTAssertNotNil(article.url) - - XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/")) - - XCTAssertEqual(article.authors!.count, 1) // TODO: parse Atom authors - let author = article.authors!.first! - if author.name == "Daring Fireball Department of Commerce" { - XCTAssertNil(author.url) - } - else { - XCTAssertEqual(author.name, "John Gruber") - XCTAssertEqual(author.url, "http://daringfireball.net/") - } - - XCTAssertNotNil(article.datePublished) - XCTAssert(article.attachments == nil) - - XCTAssertEqual(article.language, "en") - } - } - - func test4fsodonlineAttachments() async { - - // Thanks to Marco for finding me some Atom podcast feeds. Apparently they’re super-rare. - - let d = parserData("4fsodonline", "atom", "http://4fsodonline.blogspot.com/") - let parsedFeed = try! await FeedParser.parse(d)! - - for article in parsedFeed.items { - - XCTAssertTrue(article.attachments!.count > 0) - let attachment = article.attachments!.first! - - XCTAssertTrue(attachment.url.hasPrefix("http://www.blogger.com/video-play.mp4?")) - XCTAssertNil(attachment.sizeInBytes) - XCTAssertEqual(attachment.mimeType!, "video/mp4") - } - } - - func testExpertOpinionENTAttachments() async { - - // Another from Marco. - - let d = parserData("expertopinionent", "atom", "http://expertopinionent.typepad.com/my-blog/") - let parsedFeed = try! await FeedParser.parse(d)! - - for article in parsedFeed.items { - - guard let attachments = article.attachments else { - continue - } - - XCTAssertEqual(attachments.count, 1) - let attachment = attachments.first! - - XCTAssertTrue(attachment.url.hasSuffix(".mp3")) - XCTAssertNil(attachment.sizeInBytes) - XCTAssertEqual(attachment.mimeType!, "audio/mpeg") - } - } -} +//class AtomParserTests: XCTestCase { +// +// func testDaringFireballPerformance() { +// +// // 0.009 sec on my 2012 iMac. +// let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed +// self.measure { +// let _ = try! FeedParser.parseSync(d) +// } +// } +// +// func testAllThisPerformance() { +// +// // 0.003 sec on my 2012 iMac. +// let d = parserData("allthis", "atom", "http://leancrew.com/all-this") +// self.measure { +// let _ = try! FeedParser.parseSync(d) +// } +// } +// +// func testGettingHomePageLink() async { +// +// let d = parserData("allthis", "atom", "http://leancrew.com/all-this") +// let parsedFeed = try! await FeedParser.parse(d)! +// +// XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this") +// } +// +// func testDaringFireball() async { +// +// let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed +// let parsedFeed = try! await FeedParser.parse(d)! +// +// for article in parsedFeed.items { +// +// XCTAssertNotNil(article.url) +// +// XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/")) +// +// XCTAssertEqual(article.authors!.count, 1) // TODO: parse Atom authors +// let author = article.authors!.first! +// if author.name == "Daring Fireball Department of Commerce" { +// XCTAssertNil(author.url) +// } +// else { +// XCTAssertEqual(author.name, "John Gruber") +// XCTAssertEqual(author.url, "http://daringfireball.net/") +// } +// +// XCTAssertNotNil(article.datePublished) +// XCTAssert(article.attachments == nil) +// +// XCTAssertEqual(article.language, "en") +// } +// } +// +// func test4fsodonlineAttachments() async { +// +// // Thanks to Marco for finding me some Atom podcast feeds. Apparently they’re super-rare. +// +// let d = parserData("4fsodonline", "atom", "http://4fsodonline.blogspot.com/") +// let parsedFeed = try! await FeedParser.parse(d)! +// +// for article in parsedFeed.items { +// +// XCTAssertTrue(article.attachments!.count > 0) +// let attachment = article.attachments!.first! +// +// XCTAssertTrue(attachment.url.hasPrefix("http://www.blogger.com/video-play.mp4?")) +// XCTAssertNil(attachment.sizeInBytes) +// XCTAssertEqual(attachment.mimeType!, "video/mp4") +// } +// } +// +// func testExpertOpinionENTAttachments() async { +// +// // Another from Marco. +// +// let d = parserData("expertopinionent", "atom", "http://expertopinionent.typepad.com/my-blog/") +// let parsedFeed = try! await FeedParser.parse(d)! +// +// for article in parsedFeed.items { +// +// guard let attachments = article.attachments else { +// continue +// } +// +// XCTAssertEqual(attachments.count, 1) +// let attachment = attachments.first! +// +// XCTAssertTrue(attachment.url.hasSuffix(".mp3")) +// XCTAssertNil(attachment.sizeInBytes) +// XCTAssertEqual(attachment.mimeType!, "audio/mpeg") +// } +// } +//} diff --git a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift index b72c26b34..7a2ea1cb4 100644 --- a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift @@ -7,41 +7,41 @@ // import XCTest -import Parser +import FeedParser -class EntityDecodingTests: XCTestCase { - - func test39Decoding() { - - // Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog. - - let s = "These are the times that try men's souls." - let decoded = s.rsparser_stringByDecodingHTMLEntities() - - XCTAssertEqual(decoded, "These are the times that try men's souls.") - } - - func testEntities() { - var s = "…" - var decoded = s.rsparser_stringByDecodingHTMLEntities() - - XCTAssertEqual(decoded, "…") - - s = "…" - decoded = s.rsparser_stringByDecodingHTMLEntities() - XCTAssertEqual(decoded, "…") - - s = "'" - decoded = s.rsparser_stringByDecodingHTMLEntities() - XCTAssertEqual(decoded, "'") - - s = "§" - decoded = s.rsparser_stringByDecodingHTMLEntities() - XCTAssertEqual(decoded, "§") - - s = "£" - decoded = s.rsparser_stringByDecodingHTMLEntities() - XCTAssertEqual(decoded, "£") - - } -} +//class EntityDecodingTests: XCTestCase { +// +// func test39Decoding() { +// +// // Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog. +// +// let s = "These are the times that try men's souls." +// let decoded = s.rsparser_stringByDecodingHTMLEntities() +// +// XCTAssertEqual(decoded, "These are the times that try men's souls.") +// } +// +// func testEntities() { +// var s = "…" +// var decoded = s.rsparser_stringByDecodingHTMLEntities() +// +// XCTAssertEqual(decoded, "…") +// +// s = "…" +// decoded = s.rsparser_stringByDecodingHTMLEntities() +// XCTAssertEqual(decoded, "…") +// +// s = "'" +// decoded = s.rsparser_stringByDecodingHTMLEntities() +// XCTAssertEqual(decoded, "'") +// +// s = "§" +// decoded = s.rsparser_stringByDecodingHTMLEntities() +// XCTAssertEqual(decoded, "§") +// +// s = "£" +// decoded = s.rsparser_stringByDecodingHTMLEntities() +// XCTAssertEqual(decoded, "£") +// +// } +//} diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift index 931db3fe2..5028ddef5 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift @@ -7,235 +7,235 @@ // import XCTest -import Parser -import ParserObjC +import FeedParser +import SAX -class FeedParserTypeTests: XCTestCase { - - // MARK: HTML - - func testDaringFireballHTMLType() { - - let d = parserData("DaringFireball", "html", "http://daringfireball.net/") - let type = feedType(d) - XCTAssertTrue(type == .notAFeed) - } - - func testFurboHTMLType() { - - let d = parserData("furbo", "html", "http://furbo.org/") - let type = feedType(d) - XCTAssertTrue(type == .notAFeed) - } - - func testInessentialHTMLType() { - - let d = parserData("inessential", "html", "http://inessential.com/") - let type = feedType(d) - XCTAssertTrue(type == .notAFeed) - } - - func testSixColorsHTMLType() { - - let d = parserData("sixcolors", "html", "https://sixcolors.com/") - let type = feedType(d) - XCTAssertTrue(type == .notAFeed) - } - - // MARK: RSS - - func testEMarleyRSSType() { - - let d = parserData("EMarley", "rss", "https://medium.com/@emarley") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testScriptingNewsRSSType() { - - let d = parserData("scriptingNews", "rss", "http://scripting.com/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testKatieFloydRSSType() { - - let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testMantonRSSType() { - - let d = parserData("manton", "rss", "http://manton.org/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testDCRainmakerRSSType() { - - let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testMacworldRSSType() { - - let d = parserData("macworld", "rss", "https://www.macworld.com/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testNatashaTheRobotRSSType() { - - let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testDontHitSaveRSSWithBOMType() { - - let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testBioRDF() { - let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - func testPHPXML() { - let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") - let type = feedType(d) - XCTAssertTrue(type == .rss) - } - - // MARK: Atom - - func testDaringFireballAtomType() { - - // File extension is .rss, but it’s really an Atom feed. - let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") - let type = feedType(d) - XCTAssertTrue(type == .atom) - } - - func testOneFootTsunamiAtomType() { - - let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/") - let type = feedType(d) - XCTAssertTrue(type == .atom) - } - - func testRussCoxAtomType() { - let d = parserData("russcox", "atom", "https://research.swtch.com/") - let type = feedType(d) - XCTAssertTrue(type == .atom) - } - - // MARK: RSS-in-JSON - - func testScriptingNewsJSONType() { - - let d = parserData("ScriptingNews", "json", "http://scripting.com/") - let type = feedType(d) - XCTAssertTrue(type == .rssInJSON) - } - - // MARK: JSON Feed - - func testInessentialJSONFeedType() { - - let d = parserData("inessential", "json", "http://inessential.com/") - let type = feedType(d) - XCTAssertTrue(type == .jsonFeed) - } - - func testAllThisJSONFeedType() { - - let d = parserData("allthis", "json", "http://leancrew.com/allthis/") - let type = feedType(d) - XCTAssertTrue(type == .jsonFeed) - } - - func testCurtJSONFeedType() { - - let d = parserData("curt", "json", "http://curtclifton.net/") - let type = feedType(d) - XCTAssertTrue(type == .jsonFeed) - } - - func testPixelEnvyJSONFeedType() { - - let d = parserData("pxlnv", "json", "http://pxlnv.com/") - let type = feedType(d) - XCTAssertTrue(type == .jsonFeed) - } - - func testRoseJSONFeedType() { - - let d = parserData("rose", "json", "https://www.rosemaryorchard.com/") - let type = feedType(d) - XCTAssertTrue(type == .jsonFeed) - } - - // MARK: Unknown - - func testPartialAllThisUnknownFeedType() { - - // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. - // The type detector should return .unknown rather than .notAFeed. - - let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/") - let type = feedType(d, isPartialData: true) - XCTAssertEqual(type, .unknown) - } - - // MARK: Performance - - func testFeedTypePerformance() { - - // 0.000 on my 2012 iMac. - - let d = parserData("EMarley", "rss", "https://medium.com/@emarley") - self.measure { - let _ = feedType(d) - } - } - - func testFeedTypePerformance2() { - - // 0.000 on my 2012 iMac. - - let d = parserData("inessential", "json", "http://inessential.com/") - self.measure { - let _ = feedType(d) - } - } - - func testFeedTypePerformance3() { - - // 0.000 on my 2012 iMac. - - let d = parserData("DaringFireball", "html", "http://daringfireball.net/") - self.measure { - let _ = feedType(d) - } - } - - func testFeedTypePerformance4() { - - // 0.001 on my 2012 iMac. - - let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") - self.measure { - let _ = feedType(d) - } - } - -} +//class FeedParserTypeTests: XCTestCase { +// +// // MARK: HTML +// +// func testDaringFireballHTMLType() { +// +// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") +// let type = feedType(d) +// XCTAssertTrue(type == .notAFeed) +// } +// +// func testFurboHTMLType() { +// +// let d = parserData("furbo", "html", "http://furbo.org/") +// let type = feedType(d) +// XCTAssertTrue(type == .notAFeed) +// } +// +// func testInessentialHTMLType() { +// +// let d = parserData("inessential", "html", "http://inessential.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .notAFeed) +// } +// +// func testSixColorsHTMLType() { +// +// let d = parserData("sixcolors", "html", "https://sixcolors.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .notAFeed) +// } +// +// // MARK: RSS +// +// func testEMarleyRSSType() { +// +// let d = parserData("EMarley", "rss", "https://medium.com/@emarley") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testScriptingNewsRSSType() { +// +// let d = parserData("scriptingNews", "rss", "http://scripting.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testKatieFloydRSSType() { +// +// let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testMantonRSSType() { +// +// let d = parserData("manton", "rss", "http://manton.org/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testDCRainmakerRSSType() { +// +// let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testMacworldRSSType() { +// +// let d = parserData("macworld", "rss", "https://www.macworld.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testNatashaTheRobotRSSType() { +// +// let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testDontHitSaveRSSWithBOMType() { +// +// let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testBioRDF() { +// let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// func testPHPXML() { +// let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") +// let type = feedType(d) +// XCTAssertTrue(type == .rss) +// } +// +// // MARK: Atom +// +// func testDaringFireballAtomType() { +// +// // File extension is .rss, but it’s really an Atom feed. +// let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") +// let type = feedType(d) +// XCTAssertTrue(type == .atom) +// } +// +// func testOneFootTsunamiAtomType() { +// +// let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .atom) +// } +// +// func testRussCoxAtomType() { +// let d = parserData("russcox", "atom", "https://research.swtch.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .atom) +// } +// +// // MARK: RSS-in-JSON +// +// func testScriptingNewsJSONType() { +// +// let d = parserData("ScriptingNews", "json", "http://scripting.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .rssInJSON) +// } +// +// // MARK: JSON Feed +// +// func testInessentialJSONFeedType() { +// +// let d = parserData("inessential", "json", "http://inessential.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .jsonFeed) +// } +// +// func testAllThisJSONFeedType() { +// +// let d = parserData("allthis", "json", "http://leancrew.com/allthis/") +// let type = feedType(d) +// XCTAssertTrue(type == .jsonFeed) +// } +// +// func testCurtJSONFeedType() { +// +// let d = parserData("curt", "json", "http://curtclifton.net/") +// let type = feedType(d) +// XCTAssertTrue(type == .jsonFeed) +// } +// +// func testPixelEnvyJSONFeedType() { +// +// let d = parserData("pxlnv", "json", "http://pxlnv.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .jsonFeed) +// } +// +// func testRoseJSONFeedType() { +// +// let d = parserData("rose", "json", "https://www.rosemaryorchard.com/") +// let type = feedType(d) +// XCTAssertTrue(type == .jsonFeed) +// } +// +// // MARK: Unknown +// +// func testPartialAllThisUnknownFeedType() { +// +// // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. +// // The type detector should return .unknown rather than .notAFeed. +// +// let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/") +// let type = feedType(d, isPartialData: true) +// XCTAssertEqual(type, .unknown) +// } +// +// // MARK: Performance +// +// func testFeedTypePerformance() { +// +// // 0.000 on my 2012 iMac. +// +// let d = parserData("EMarley", "rss", "https://medium.com/@emarley") +// self.measure { +// let _ = feedType(d) +// } +// } +// +// func testFeedTypePerformance2() { +// +// // 0.000 on my 2012 iMac. +// +// let d = parserData("inessential", "json", "http://inessential.com/") +// self.measure { +// let _ = feedType(d) +// } +// } +// +// func testFeedTypePerformance3() { +// +// // 0.000 on my 2012 iMac. +// +// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") +// self.measure { +// let _ = feedType(d) +// } +// } +// +// func testFeedTypePerformance4() { +// +// // 0.001 on my 2012 iMac. +// +// let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") +// self.measure { +// let _ = feedType(d) +// } +// } +// +//} func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { let filename = "Resources/\(filename)" diff --git a/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift index 157445587..cd39f7847 100644 --- a/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift @@ -7,37 +7,36 @@ // import XCTest -import Parser -import ParserObjC +import FeedParser -class HTMLLinkTests: XCTestCase { - - func testSixColorsPerformance() { - - // 0.003 sec on my 2012 iMac - let d = parserData("sixcolors", "html", "http://sixcolors.com/") - self.measure { - let _ = RSHTMLLinkParser.htmlLinks(with: d) - } - } - - func testSixColorsLink() { - - let d = parserData("sixcolors", "html", "http://sixcolors.com/") - let links = RSHTMLLinkParser.htmlLinks(with: d) - - let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php" - let textToFind = "this week’s episode of The Incomparable" - - var found = false - for oneLink in links { - if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind { - found = true - } - } - - XCTAssertTrue(found) - XCTAssertEqual(links.count, 131) - } - -} +//class HTMLLinkTests: XCTestCase { +// +// func testSixColorsPerformance() { +// +// // 0.003 sec on my 2012 iMac +// let d = parserData("sixcolors", "html", "http://sixcolors.com/") +// self.measure { +// let _ = RSHTMLLinkParser.htmlLinks(with: d) +// } +// } +// +// func testSixColorsLink() { +// +// let d = parserData("sixcolors", "html", "http://sixcolors.com/") +// let links = RSHTMLLinkParser.htmlLinks(with: d) +// +// let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php" +// let textToFind = "this week’s episode of The Incomparable" +// +// var found = false +// for oneLink in links { +// if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind { +// found = true +// } +// } +// +// XCTAssertTrue(found) +// XCTAssertEqual(links.count, 131) +// } +// +//} diff --git a/Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift index fed009ee6..bf30d68c9 100644 --- a/Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift @@ -7,148 +7,147 @@ // import XCTest -import Parser -import ParserObjC +import FeedParser -class HTMLMetadataTests: XCTestCase { - - func testDaringFireball() { - - let d = parserData("DaringFireball", "html", "http://daringfireball.net/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - - XCTAssertEqual(metadata.favicons.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") - - XCTAssertEqual(metadata.feedLinks.count, 1) - - let feedLink = metadata.feedLinks.first! - XCTAssertNil(feedLink.title) - XCTAssertEqual(feedLink.type, "application/atom+xml") - XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main") - } - - func testDaringFireballPerformance() { - - // 0.002 sec on my 2012 iMac - let d = parserData("DaringFireball", "html", "http://daringfireball.net/") - self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) - } - } - - func testFurbo() { - - let d = parserData("furbo", "html", "http://furbo.org/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - - XCTAssertEqual(metadata.favicons.first?.urlString, "http://furbo.org/favicon.ico") - - XCTAssertEqual(metadata.feedLinks.count, 1) - - let feedLink = metadata.feedLinks.first! - XCTAssertEqual(feedLink.title, "Iconfactory News Feed") - XCTAssertEqual(feedLink.type, "application/rss+xml") - } - - func testFurboPerformance() { - - // 0.001 sec on my 2012 iMac - let d = parserData("furbo", "html", "http://furbo.org/") - self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) - } - } - - func testInessential() { - - let d = parserData("inessential", "html", "http://inessential.com/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - - XCTAssertNil(metadata.favicons.first?.urlString) - - XCTAssertEqual(metadata.feedLinks.count, 1) - let feedLink = metadata.feedLinks.first! - XCTAssertEqual(feedLink.title, "RSS") - XCTAssertEqual(feedLink.type, "application/rss+xml") - XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml") - - XCTAssertEqual(metadata.appleTouchIcons.count, 0); - } - - func testInessentialPerformance() { - - // 0.001 sec on my 2012 iMac - let d = parserData("inessential", "html", "http://inessential.com/") - self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) - } - } - - func testCocoPerformance() { - - // 0.004 sec on my 2012 iMac - let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") - self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) - } - } - - func testSixColors() { - - let d = parserData("sixcolors", "html", "http://sixcolors.com/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - - XCTAssertEqual(metadata.favicons.first?.urlString, "https://sixcolors.com/images/favicon.ico") - - XCTAssertEqual(metadata.feedLinks.count, 1); - let feedLink = metadata.feedLinks.first! - XCTAssertEqual(feedLink.title, "RSS"); - XCTAssertEqual(feedLink.type, "application/rss+xml"); - XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors"); - - XCTAssertEqual(metadata.appleTouchIcons.count, 6); - let icon = metadata.appleTouchIcons[3]; - XCTAssertEqual(icon.rel, "apple-touch-icon"); - XCTAssertEqual(icon.sizes, "120x120"); - XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png"); - } - - func testSixColorsPerformance() { - - // 0.002 sec on my 2012 iMac - let d = parserData("sixcolors", "html", "http://sixcolors.com/") - self.measure { - let _ = RSHTMLMetadataParser.htmlMetadata(with: d) - } - } - - func testCocoOGImage() { - - let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - let openGraphData = metadata.openGraphProperties - let image = openGraphData.images.first! - XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") - } - - func testCocoTwitterImage() { - - let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - let twitterData = metadata.twitterProperties - let imageURL = twitterData.imageURL! - XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") - } - - func testYouTube() { - // YouTube is a special case — the feed links appear after the head section, in the body section. - let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks") - let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) - - XCTAssertEqual(metadata.feedLinks.count, 1); - let feedLink = metadata.feedLinks.first! - XCTAssertEqual(feedLink.title, "RSS"); - XCTAssertEqual(feedLink.type, "application/rss+xml"); - XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ"); - } -} +//class HTMLMetadataTests: XCTestCase { +// +// func testDaringFireball() { +// +// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// +// XCTAssertEqual(metadata.favicons.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") +// +// XCTAssertEqual(metadata.feedLinks.count, 1) +// +// let feedLink = metadata.feedLinks.first! +// XCTAssertNil(feedLink.title) +// XCTAssertEqual(feedLink.type, "application/atom+xml") +// XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main") +// } +// +// func testDaringFireballPerformance() { +// +// // 0.002 sec on my 2012 iMac +// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") +// self.measure { +// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) +// } +// } +// +// func testFurbo() { +// +// let d = parserData("furbo", "html", "http://furbo.org/") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// +// XCTAssertEqual(metadata.favicons.first?.urlString, "http://furbo.org/favicon.ico") +// +// XCTAssertEqual(metadata.feedLinks.count, 1) +// +// let feedLink = metadata.feedLinks.first! +// XCTAssertEqual(feedLink.title, "Iconfactory News Feed") +// XCTAssertEqual(feedLink.type, "application/rss+xml") +// } +// +// func testFurboPerformance() { +// +// // 0.001 sec on my 2012 iMac +// let d = parserData("furbo", "html", "http://furbo.org/") +// self.measure { +// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) +// } +// } +// +// func testInessential() { +// +// let d = parserData("inessential", "html", "http://inessential.com/") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// +// XCTAssertNil(metadata.favicons.first?.urlString) +// +// XCTAssertEqual(metadata.feedLinks.count, 1) +// let feedLink = metadata.feedLinks.first! +// XCTAssertEqual(feedLink.title, "RSS") +// XCTAssertEqual(feedLink.type, "application/rss+xml") +// XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml") +// +// XCTAssertEqual(metadata.appleTouchIcons.count, 0); +// } +// +// func testInessentialPerformance() { +// +// // 0.001 sec on my 2012 iMac +// let d = parserData("inessential", "html", "http://inessential.com/") +// self.measure { +// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) +// } +// } +// +// func testCocoPerformance() { +// +// // 0.004 sec on my 2012 iMac +// let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") +// self.measure { +// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) +// } +// } +// +// func testSixColors() { +// +// let d = parserData("sixcolors", "html", "http://sixcolors.com/") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// +// XCTAssertEqual(metadata.favicons.first?.urlString, "https://sixcolors.com/images/favicon.ico") +// +// XCTAssertEqual(metadata.feedLinks.count, 1); +// let feedLink = metadata.feedLinks.first! +// XCTAssertEqual(feedLink.title, "RSS"); +// XCTAssertEqual(feedLink.type, "application/rss+xml"); +// XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors"); +// +// XCTAssertEqual(metadata.appleTouchIcons.count, 6); +// let icon = metadata.appleTouchIcons[3]; +// XCTAssertEqual(icon.rel, "apple-touch-icon"); +// XCTAssertEqual(icon.sizes, "120x120"); +// XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png"); +// } +// +// func testSixColorsPerformance() { +// +// // 0.002 sec on my 2012 iMac +// let d = parserData("sixcolors", "html", "http://sixcolors.com/") +// self.measure { +// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) +// } +// } +// +// func testCocoOGImage() { +// +// let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// let openGraphData = metadata.openGraphProperties +// let image = openGraphData.images.first! +// XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") +// } +// +// func testCocoTwitterImage() { +// +// let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// let twitterData = metadata.twitterProperties +// let imageURL = twitterData.imageURL! +// XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") +// } +// +// func testYouTube() { +// // YouTube is a special case — the feed links appear after the head section, in the body section. +// let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks") +// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) +// +// XCTAssertEqual(metadata.feedLinks.count, 1); +// let feedLink = metadata.feedLinks.first! +// XCTAssertEqual(feedLink.title, "RSS"); +// XCTAssertEqual(feedLink.type, "application/rss+xml"); +// XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ"); +// } +//} diff --git a/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift index fc3af8e5e..688857de5 100644 --- a/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift @@ -7,118 +7,118 @@ // import XCTest -import Parser +import FeedParser -class JSONFeedParserTests: XCTestCase { - - func testInessentialPerformance() { - - // 0.001 sec on my 2012 iMac. - let d = parserData("inessential", "json", "http://inessential.com/") - self.measure { - let _ = try! FeedParser.parseSync(d) - } - } - - func testDaringFireballPerformance() { - - // 0.009 sec on my 2012 iMac. - let d = parserData("DaringFireball", "json", "http://daringfireball.net/") - self.measure { - let _ = try! FeedParser.parseSync(d) - } - } - - func testGettingFaviconAndIconURLs() async { - - let d = parserData("DaringFireball", "json", "http://daringfireball.net/") - let parsedFeed = try! await FeedParser.parse(d)! - - XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png") - XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png") - } - - func testAllThis() async { - - let d = parserData("allthis", "json", "http://leancrew.com/allthis/") - let parsedFeed = try! await FeedParser.parse(d)! - - XCTAssertEqual(parsedFeed.items.count, 12) - } - - func testCurt() async { - - let d = parserData("curt", "json", "http://curtclifton.net/") - let parsedFeed = try! await FeedParser.parse(d)! - - XCTAssertEqual(parsedFeed.items.count, 26) - - var didFindTwitterQuitterArticle = false - for article in parsedFeed.items { - if article.title == "Twitter Quitter" { - didFindTwitterQuitterArticle = true - XCTAssertTrue(article.contentHTML!.hasPrefix("

I’ve decided to close my Twitter account. William Van Hecke makes a convincing case")) - } - } - - XCTAssertTrue(didFindTwitterQuitterArticle) - } - - func testPixelEnvy() async { - - let d = parserData("pxlnv", "json", "http://pxlnv.com/") - let parsedFeed = try! await FeedParser.parse(d)! - XCTAssertEqual(parsedFeed.items.count, 20) - - } - - func testRose() async { - let d = parserData("rose", "json", "http://www.rosemaryorchard.com/") - let parsedFeed = try! await FeedParser.parse(d)! - XCTAssertEqual(parsedFeed.items.count, 84) - } - - func test3960() async { - let d = parserData("3960", "json", "http://journal.3960.org/") - let parsedFeed = try! await FeedParser.parse(d)! - XCTAssertEqual(parsedFeed.items.count, 20) - XCTAssertEqual(parsedFeed.language, "de-DE") - - for item in parsedFeed.items { - XCTAssertEqual(item.language, "de-DE") - } - } - - func testAuthors() async { - let d = parserData("authors", "json", "https://example.com/") - let parsedFeed = try! await FeedParser.parse(d)! - XCTAssertEqual(parsedFeed.items.count, 4) - - let rootAuthors = Set([ - ParsedAuthor(name: "Root Author 1", url: nil, avatarURL: nil, emailAddress: nil), - ParsedAuthor(name: "Root Author 2", url: nil, avatarURL: nil, emailAddress: nil) - ]) - let itemAuthors = Set([ - ParsedAuthor(name: "Item Author 1", url: nil, avatarURL: nil, emailAddress: nil), - ParsedAuthor(name: "Item Author 2", url: nil, avatarURL: nil, emailAddress: nil) - ]) - let legacyItemAuthors = Set([ - ParsedAuthor(name: "Legacy Item Author", url: nil, avatarURL: nil, emailAddress: nil) - ]) - - XCTAssertEqual(parsedFeed.authors?.count, 2) - XCTAssertEqual(parsedFeed.authors, rootAuthors) - - let noAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item without authors" }! - XCTAssertEqual(noAuthorsItem.authors, nil) - - let legacyAuthorItem = parsedFeed.items.first { $0.uniqueID == "Item with legacy author" }! - XCTAssertEqual(legacyAuthorItem.authors, legacyItemAuthors) - - let modernAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with modern authors" }! - XCTAssertEqual(modernAuthorsItem.authors, itemAuthors) - - let bothAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with both" }! - XCTAssertEqual(bothAuthorsItem.authors, itemAuthors) - } -} +//class JSONFeedParserTests: XCTestCase { +// +// func testInessentialPerformance() { +// +// // 0.001 sec on my 2012 iMac. +// let d = parserData("inessential", "json", "http://inessential.com/") +// self.measure { +// let _ = try! FeedParser.parseSync(d) +// } +// } +// +// func testDaringFireballPerformance() { +// +// // 0.009 sec on my 2012 iMac. +// let d = parserData("DaringFireball", "json", "http://daringfireball.net/") +// self.measure { +// let _ = try! FeedParser.parseSync(d) +// } +// } +// +// func testGettingFaviconAndIconURLs() async { +// +// let d = parserData("DaringFireball", "json", "http://daringfireball.net/") +// let parsedFeed = try! await FeedParser.parse(d)! +// +// XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png") +// XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png") +// } +// +// func testAllThis() async { +// +// let d = parserData("allthis", "json", "http://leancrew.com/allthis/") +// let parsedFeed = try! await FeedParser.parse(d)! +// +// XCTAssertEqual(parsedFeed.items.count, 12) +// } +// +// func testCurt() async { +// +// let d = parserData("curt", "json", "http://curtclifton.net/") +// let parsedFeed = try! await FeedParser.parse(d)! +// +// XCTAssertEqual(parsedFeed.items.count, 26) +// +// var didFindTwitterQuitterArticle = false +// for article in parsedFeed.items { +// if article.title == "Twitter Quitter" { +// didFindTwitterQuitterArticle = true +// XCTAssertTrue(article.contentHTML!.hasPrefix("

I’ve decided to close my Twitter account. William Van Hecke makes a convincing case")) +// } +// } +// +// XCTAssertTrue(didFindTwitterQuitterArticle) +// } +// +// func testPixelEnvy() async { +// +// let d = parserData("pxlnv", "json", "http://pxlnv.com/") +// let parsedFeed = try! await FeedParser.parse(d)! +// XCTAssertEqual(parsedFeed.items.count, 20) +// +// } +// +// func testRose() async { +// let d = parserData("rose", "json", "http://www.rosemaryorchard.com/") +// let parsedFeed = try! await FeedParser.parse(d)! +// XCTAssertEqual(parsedFeed.items.count, 84) +// } +// +// func test3960() async { +// let d = parserData("3960", "json", "http://journal.3960.org/") +// let parsedFeed = try! await FeedParser.parse(d)! +// XCTAssertEqual(parsedFeed.items.count, 20) +// XCTAssertEqual(parsedFeed.language, "de-DE") +// +// for item in parsedFeed.items { +// XCTAssertEqual(item.language, "de-DE") +// } +// } +// +// func testAuthors() async { +// let d = parserData("authors", "json", "https://example.com/") +// let parsedFeed = try! await FeedParser.parse(d)! +// XCTAssertEqual(parsedFeed.items.count, 4) +// +// let rootAuthors = Set([ +// ParsedAuthor(name: "Root Author 1", url: nil, avatarURL: nil, emailAddress: nil), +// ParsedAuthor(name: "Root Author 2", url: nil, avatarURL: nil, emailAddress: nil) +// ]) +// let itemAuthors = Set([ +// ParsedAuthor(name: "Item Author 1", url: nil, avatarURL: nil, emailAddress: nil), +// ParsedAuthor(name: "Item Author 2", url: nil, avatarURL: nil, emailAddress: nil) +// ]) +// let legacyItemAuthors = Set([ +// ParsedAuthor(name: "Legacy Item Author", url: nil, avatarURL: nil, emailAddress: nil) +// ]) +// +// XCTAssertEqual(parsedFeed.authors?.count, 2) +// XCTAssertEqual(parsedFeed.authors, rootAuthors) +// +// let noAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item without authors" }! +// XCTAssertEqual(noAuthorsItem.authors, nil) +// +// let legacyAuthorItem = parsedFeed.items.first { $0.uniqueID == "Item with legacy author" }! +// XCTAssertEqual(legacyAuthorItem.authors, legacyItemAuthors) +// +// let modernAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with modern authors" }! +// XCTAssertEqual(modernAuthorsItem.authors, itemAuthors) +// +// let bothAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with both" }! +// XCTAssertEqual(bothAuthorsItem.authors, itemAuthors) +// } +//} diff --git a/Modules/Parser/Tests/FeedParserTests/ParserTests.swift b/Modules/Parser/Tests/FeedParserTests/ParserTests.swift index e00240408..814773283 100644 --- a/Modules/Parser/Tests/FeedParserTests/ParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/ParserTests.swift @@ -1,5 +1,5 @@ import XCTest -@testable import Parser +import FeedParser final class ParserTests: XCTestCase { func testExample() throws { diff --git a/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift index cbfce4d13..df6bef373 100644 --- a/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift @@ -7,22 +7,22 @@ // import XCTest -import Parser +import FeedParser -class RSSInJSONParserTests: XCTestCase { - - func testScriptingNewsPerformance() { - - // 0.003 sec on my 2012 iMac. - let d = parserData("ScriptingNews", "json", "http://scripting.com/") - self.measure { - let _ = try! FeedParser.parseSync(d) - } - } - - func testFeedLanguage() { - let d = parserData("ScriptingNews", "json", "http://scripting.com/") - let parsedFeed = try! FeedParser.parseSync(d)! - XCTAssertEqual(parsedFeed.language, "en-us") - } -} +//class RSSInJSONParserTests: XCTestCase { +// +// func testScriptingNewsPerformance() { +// +// // 0.003 sec on my 2012 iMac. +// let d = parserData("ScriptingNews", "json", "http://scripting.com/") +// self.measure { +// let _ = try! FeedParser.parseSync(d) +// } +// } +// +// func testFeedLanguage() { +// let d = parserData("ScriptingNews", "json", "http://scripting.com/") +// let parsedFeed = try! FeedParser.parseSync(d)! +// XCTAssertEqual(parsedFeed.language, "en-us") +// } +//} diff --git a/Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift index c60229ca3..4dea40bee 100644 --- a/Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift @@ -7,7 +7,7 @@ // import XCTest -import Parser +import FeedParser class RSSParserTests: XCTestCase { @@ -17,7 +17,7 @@ class RSSParserTests: XCTestCase { // 0.002 2022 Mac Studio let d = parserData("scriptingNews", "rss", "http://scripting.com/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -27,7 +27,7 @@ class RSSParserTests: XCTestCase { // 0.001 2022 Mac Studio let d = parserData("KatieFloyd", "rss", "http://katiefloyd.com/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -37,7 +37,7 @@ class RSSParserTests: XCTestCase { // 0.0004 2022 Mac Studio let d = parserData("EMarley", "rss", "https://medium.com/@emarley") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } @@ -47,21 +47,23 @@ class RSSParserTests: XCTestCase { // 0.0006 2022 Mac Studio let d = parserData("manton", "rss", "http://manton.org/") self.measure { - let _ = try! FeedParser.parseSync(d) + let _ = try! FeedParser.parse(d) } } func testNatashaTheRobot() async { let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.items.count, 10) } func testTheOmniShowAttachments() async { let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNotNil(article.attachments) @@ -78,7 +80,9 @@ class RSSParserTests: XCTestCase { func testTheOmniShowUniqueIDs() async { let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNotNil(article.uniqueID) @@ -91,7 +95,7 @@ class RSSParserTests: XCTestCase { // Macworld’s feed doesn’t have guids, so they should be calculated unique IDs. let d = parserData("macworld", "rss", "https://www.macworld.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! for article in parsedFeed.items { XCTAssertNotNil(article.uniqueID) @@ -104,7 +108,9 @@ class RSSParserTests: XCTestCase { // Macworld uses names instead of email addresses (despite the RSS spec saying they should be email addresses). let d = parserData("macworld", "rss", "https://www.macworld.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { @@ -123,7 +129,9 @@ class RSSParserTests: XCTestCase { // detect this situation, and every article in the feed should have a permalink. let d = parserData("monkeydom", "rss", "https://coding.monkeydom.de/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNil(article.url) @@ -136,7 +144,9 @@ class RSSParserTests: XCTestCase { // https://github.com/brentsimmons/NetNewsWire/issues/529 let d = parserData("atp", "rss", "http://atp.fm/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) for article in parsedFeed.items { XCTAssertNotNil(article.contentHTML) @@ -145,7 +155,10 @@ class RSSParserTests: XCTestCase { func testFeedKnownToHaveGuidsThatArentPermalinks() async { let d = parserData("livemint", "xml", "https://www.livemint.com/rss/news") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) + for article in parsedFeed.items { XCTAssertNil(article.url) } @@ -155,7 +168,10 @@ class RSSParserTests: XCTestCase { // This feed uses atom authors, and we don’t want author/title to be used as item/title. // https://github.com/brentsimmons/NetNewsWire/issues/943 let d = parserData("cloudblog", "rss", "https://cloudblog.withgoogle.com/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) + for article in parsedFeed.items { XCTAssertNotEqual(article.title, "Product Manager, Office of the CTO") XCTAssertNotEqual(article.title, "Developer Programs Engineer") @@ -167,26 +183,29 @@ class RSSParserTests: XCTestCase { // This invalid feed has elements inside s. // 17 Jan 2021 bug report — we’re not parsing titles in this feed. let d = parserData("aktuality", "rss", "https://www.aktuality.sk/") - let parsedFeed = try! await FeedParser.parse(d)! - for article in parsedFeed.items { + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.items.count > 0) + + for article in parsedFeed.items { XCTAssertNotNil(article.title) } } func testFeedLanguage() async { let d = parserData("manton", "rss", "http://manton.org/") - let parsedFeed = try! await FeedParser.parse(d)! + let parsedFeed = try! FeedParser.parse(d)! XCTAssertEqual(parsedFeed.language, "en-US") } -// func testFeedWithGB2312Encoding() { -// // This feed has an encoding we don’t run into very often. -// // https://github.com/Ranchero-Software/NetNewsWire/issues/1477 -// let d = parserData("kc0011", "rss", "http://kc0011.net/") -// let parsedFeed = try! FeedParser.parse(d)! -// XCTAssert(parsedFeed.items.count > 0) -// for article in parsedFeed.items { -// XCTAssertNotNil(article.contentHTML) -// } -// } + func testFeedWithGB2312Encoding() { + // This feed has an encoding we don’t run into very often. + // https://github.com/Ranchero-Software/NetNewsWire/issues/1477 + let d = parserData("kc0011", "rss", "http://kc0011.net/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssert(parsedFeed.items.count > 0) + for article in parsedFeed.items { + XCTAssertNotNil(article.contentHTML) + } + } } From 6d798ee1671293e7ea65c541888fb0a526d15e19 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 9 Sep 2024 21:57:54 -0700 Subject: [PATCH 35/88] Convert some public structs to immutable final classes. --- .../FeedParser/Feeds/ParsedAttachment.swift | 8 +++++++- .../FeedParser/Feeds/ParsedAuthor.swift | 11 ++++++++-- .../Sources/FeedParser/Feeds/ParsedHub.swift | 20 ++++++++++++++++++- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift index 48b2bd64c..a3c3cb4f5 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedAttachment: Hashable, Sendable { +public final class ParsedAttachment: Hashable, Sendable { public let url: String public let mimeType: String? @@ -33,4 +33,10 @@ public struct ParsedAttachment: Hashable, Sendable { public func hash(into hasher: inout Hasher) { hasher.combine(url) } + + // MARK: - Equatable + + public static func ==(lhs: ParsedAttachment, rhs: ParsedAttachment) -> Bool { + lhs.url == rhs.url && lhs.mimeType == rhs.mimeType && lhs.title == rhs.title && lhs.sizeInBytes == rhs.sizeInBytes && lhs.durationInSeconds == rhs.durationInSeconds + } } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift index 01e91e2ad..260eece16 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift @@ -8,7 +8,7 @@ import Foundation -public struct ParsedAuthor: Hashable, Codable, Sendable { +public final class ParsedAuthor: Hashable, Codable, Sendable { public let name: String? public let url: String? @@ -23,7 +23,7 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { } /// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.) - init(singleString: String) { + convenience init(singleString: String) { if singleString.contains("@") { self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) @@ -53,4 +53,11 @@ public struct ParsedAuthor: Hashable, Codable, Sendable { hasher.combine("") } } + + // MARK: - Equatable + + public static func ==(lhs: ParsedAuthor, rhs: ParsedAuthor) -> Bool { + + lhs.name == rhs.name && lhs.url == rhs.url && lhs.avatarURL == rhs.avatarURL && lhs.emailAddress == rhs.emailAddress + } } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift b/Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift index a1e95e7e2..abd687467 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift @@ -8,8 +8,26 @@ import Foundation -public struct ParsedHub: Hashable, Sendable { +public final class ParsedHub: Hashable, Sendable { public let type: String public let url: String + + init(type: String, url: String) { + self.type = type + self.url = url + } + + // MARK: - Hashable + + public func hash(into hasher: inout Hasher) { + hasher.combine(type) + hasher.combine(url) + } + + // MARK: - Equatable + + public static func ==(lhs: ParsedHub, rhs: ParsedHub) -> Bool { + lhs.type == rhs.type && lhs.url == rhs.url + } } From 860ecfd58c084995421324b11b42d3b6ce24b17b Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Wed, 11 Sep 2024 21:53:58 -0700 Subject: [PATCH 36/88] Start porting FeedType to Swift. --- .../Sources/FeedParser/Feeds/FeedType.swift | 110 +++++++---- .../FeedParserTests/FeedParserTypeTests.swift | 175 +++++++++--------- 2 files changed, 156 insertions(+), 129 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index f934c8920..4e4bf9960 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -10,53 +10,81 @@ import Foundation import SAX public enum FeedType: Sendable { + case rss case atom case jsonFeed case rssInJSON case unknown case notAFeed + + private static let minNumberOfBytesRequired = 128 + + static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType { + + // Can call with partial data — while still downloading, for instance. + // If there’s not enough data, return .unknown. Ask again when there’s more data. + // If it’s definitely not a feed, return .notAFeed. + + if data.count < minNumberOfBytesRequired { + return .unknown + } + + let count = data.count + + return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in + + guard let baseAddress = pointer.baseAddress else { + return .unknown + } + let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self) + + if isProbablyRSS(cCharPointer, count) { + return .rss + } + + return .unknown + } +// if d.isProbablyJSONFeed() { +// return .jsonFeed +// } +// if d.isProbablyRSSInJSON() { +// return .rssInJSON +// } +// if d.isProbablyAtom() { +// return .atom +// } +// +// if isPartialData && d.isProbablyJSON() { +// // Might not be able to detect a JSON Feed without all data. +// // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) +// // has, at this writing, the JSON version element at the end of the feed, +// // which is totally legal — but it means not being able to detect +// // that it’s a JSON Feed without all the data. +// // So this returns .unknown instead of .notAFeed. +// return .unknown +// } + +// return .notAFeed + +// return type + } } +private extension FeedType { -//private let minNumberOfBytesRequired = 128 -// -//public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { -// -// // Can call with partial data — while still downloading, for instance. -// // If there’s not enough data, return .unknown. Ask again when there’s more data. -// // If it’s definitely not a feed, return .notAFeed. -// // -// // This is fast enough to call on the main thread. -// -// if parserData.data.count < minNumberOfBytesRequired { -// return .unknown -// } -// -// let nsdata = parserData.data as NSData -// -// if nsdata.isProbablyJSONFeed() { -// return .jsonFeed -// } -// if nsdata.isProbablyRSSInJSON() { -// return .rssInJSON -// } -// if nsdata.isProbablyRSS() { -// return .rss -// } -// if nsdata.isProbablyAtom() { -// return .atom -// } -// -// if isPartialData && nsdata.isProbablyJSON() { -// // Might not be able to detect a JSON Feed without all data. -// // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) -// // has, at this writing, the JSON version element at the end of the feed, -// // which is totally legal — but it means not being able to detect -// // that it’s a JSON Feed without all the data. -// // So this returns .unknown instead of .notAFeed. -// return .unknown -// } -// -// return .notAFeed -//} + static func isProbablyRSS(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + if didFindString("", bytes, count) && didFindString("", bytes, count) + } + + static func didFindString(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { + + let foundString = strnstr(bytes, string, numberOfBytes) + return foundString != nil + } +} diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift index 5028ddef5..dc28935ab 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift @@ -7,13 +7,13 @@ // import XCTest -import FeedParser +@testable import FeedParser import SAX -//class FeedParserTypeTests: XCTestCase { -// -// // MARK: HTML -// +class FeedParserTypeTests: XCTestCase { + + // MARK: HTML + // func testDaringFireballHTMLType() { // // let d = parserData("DaringFireball", "html", "http://daringfireball.net/") @@ -41,79 +41,79 @@ import SAX // let type = feedType(d) // XCTAssertTrue(type == .notAFeed) // } -// -// // MARK: RSS -// -// func testEMarleyRSSType() { -// -// let d = parserData("EMarley", "rss", "https://medium.com/@emarley") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testScriptingNewsRSSType() { -// -// let d = parserData("scriptingNews", "rss", "http://scripting.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testKatieFloydRSSType() { -// -// let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testMantonRSSType() { -// -// let d = parserData("manton", "rss", "http://manton.org/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testDCRainmakerRSSType() { -// -// let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testMacworldRSSType() { -// -// let d = parserData("macworld", "rss", "https://www.macworld.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testNatashaTheRobotRSSType() { -// -// let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testDontHitSaveRSSWithBOMType() { -// -// let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testBioRDF() { -// let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// func testPHPXML() { -// let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") -// let type = feedType(d) -// XCTAssertTrue(type == .rss) -// } -// -// // MARK: Atom -// + + // MARK: RSS + + func testEMarleyRSSType() { + + let d = parserData("EMarley", "rss", "https://medium.com/@emarley") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testScriptingNewsRSSType() { + + let d = parserData("scriptingNews", "rss", "http://scripting.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testKatieFloydRSSType() { + + let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testMantonRSSType() { + + let d = parserData("manton", "rss", "http://manton.org/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testDCRainmakerRSSType() { + + let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testMacworldRSSType() { + + let d = parserData("macworld", "rss", "https://www.macworld.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testNatashaTheRobotRSSType() { + + let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testDontHitSaveRSSWithBOMType() { + + let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testBioRDF() { + let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + func testPHPXML() { + let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rss) + } + + // MARK: Atom + // func testDaringFireballAtomType() { // // // File extension is .rss, but it’s really an Atom feed. @@ -180,9 +180,9 @@ import SAX // let type = feedType(d) // XCTAssertTrue(type == .jsonFeed) // } -// -// // MARK: Unknown -// + + // MARK: Unknown + // func testPartialAllThisUnknownFeedType() { // // // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. @@ -192,9 +192,9 @@ import SAX // let type = feedType(d, isPartialData: true) // XCTAssertEqual(type, .unknown) // } -// -// // MARK: Performance -// + + // MARK: Performance + // func testFeedTypePerformance() { // // // 0.000 on my 2012 iMac. @@ -204,7 +204,7 @@ import SAX // let _ = feedType(d) // } // } -// + // func testFeedTypePerformance2() { // // // 0.000 on my 2012 iMac. @@ -214,7 +214,7 @@ import SAX // let _ = feedType(d) // } // } -// + // func testFeedTypePerformance3() { // // // 0.000 on my 2012 iMac. @@ -234,8 +234,7 @@ import SAX // let _ = feedType(d) // } // } -// -//} +} func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { let filename = "Resources/\(filename)" From 1159d45e5fe48bc62d65491af873ce1ec9a13d79 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Wed, 11 Sep 2024 21:56:37 -0700 Subject: [PATCH 37/88] Add Atom feed detection to FeedType. --- .../Sources/FeedParser/Feeds/FeedType.swift | 10 ++++- .../FeedParserTests/FeedParserTypeTests.swift | 42 +++++++++---------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index 4e4bf9960..aa718a336 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -10,7 +10,7 @@ import Foundation import SAX public enum FeedType: Sendable { - + case rss case atom case jsonFeed @@ -42,6 +42,9 @@ public enum FeedType: Sendable { if isProbablyRSS(cCharPointer, count) { return .rss } + if isProbablyAtom(cCharPointer, count) { + return .atom + } return .unknown } @@ -82,6 +85,11 @@ private extension FeedType { return didFindString("", bytes, count) && didFindString("", bytes, count) } + static func isProbablyAtom(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + didFindString(", _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { let foundString = strnstr(bytes, string, numberOfBytes) diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift index dc28935ab..f81e76963 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift @@ -114,27 +114,27 @@ class FeedParserTypeTests: XCTestCase { // MARK: Atom -// func testDaringFireballAtomType() { -// -// // File extension is .rss, but it’s really an Atom feed. -// let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") -// let type = feedType(d) -// XCTAssertTrue(type == .atom) -// } -// -// func testOneFootTsunamiAtomType() { -// -// let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .atom) -// } -// -// func testRussCoxAtomType() { -// let d = parserData("russcox", "atom", "https://research.swtch.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .atom) -// } -// + func testDaringFireballAtomType() { + + // File extension is .rss, but it’s really an Atom feed. + let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .atom) + } + + func testOneFootTsunamiAtomType() { + + let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .atom) + } + + func testRussCoxAtomType() { + let d = parserData("russcox", "atom", "https://research.swtch.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .atom) + } + // // MARK: RSS-in-JSON // // func testScriptingNewsJSONType() { From 0ffb878d9c2160849a9d2f478e5fa20915f8b9f3 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Thu, 12 Sep 2024 21:47:59 -0700 Subject: [PATCH 38/88] Add `isProbablyJSON` method to FeedType detector. --- .../Sources/FeedParser/Feeds/FeedType.swift | 82 +++++++++++++------ .../FeedParserTests/FeedParserTypeTests.swift | 18 ++-- 2 files changed, 64 insertions(+), 36 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index aa718a336..b297edbba 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -26,12 +26,11 @@ public enum FeedType: Sendable { // If there’s not enough data, return .unknown. Ask again when there’s more data. // If it’s definitely not a feed, return .notAFeed. - if data.count < minNumberOfBytesRequired { + let count = data.count + if count < minNumberOfBytesRequired { return .unknown } - let count = data.count - return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in guard let baseAddress = pointer.baseAddress else { @@ -45,32 +44,18 @@ public enum FeedType: Sendable { if isProbablyAtom(cCharPointer, count) { return .atom } + if isPartialData && isProbablyJSON(cCharPointer, count) { + // Might not be able to detect a JSON Feed without all data. + // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) + // has, at this writing, the JSON version element at the end of the feed, + // which is totally legal — but it means not being able to detect + // that it’s a JSON Feed without all the data. + // So this returns .unknown instead of .notAFeed. + return .unknown + } - return .unknown + return .notAFeed } -// if d.isProbablyJSONFeed() { -// return .jsonFeed -// } -// if d.isProbablyRSSInJSON() { -// return .rssInJSON -// } -// if d.isProbablyAtom() { -// return .atom -// } -// -// if isPartialData && d.isProbablyJSON() { -// // Might not be able to detect a JSON Feed without all data. -// // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) -// // has, at this writing, the JSON version element at the end of the feed, -// // which is totally legal — but it means not being able to detect -// // that it’s a JSON Feed without all the data. -// // So this returns .unknown instead of .notAFeed. -// return .unknown -// } - -// return .notAFeed - -// return type } } @@ -90,9 +75,52 @@ private extension FeedType { didFindString(", _ count: Int) -> Bool { + + bytesStartWithStringIgnoringWhitespace("{", bytes, count) + } + static func didFindString(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { let foundString = strnstr(bytes, string, numberOfBytes) return foundString != nil } + + struct Whitespace { + static let space = Character(" ").asciiValue! + static let `return` = Character("\r").asciiValue! + static let newline = Character("\n").asciiValue! + static let tab = Character("\t").asciiValue! + } + + static func bytesStartWithStringIgnoringWhitespace(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { + + var i = 0 + + while i < numberOfBytes { + + let ch = bytes[i] + + if ch == Whitespace.space || ch == Whitespace.return || ch == Whitespace.newline || ch == Whitespace.tab { + i += 1 + continue + } + + if ch == string[0] { + if let found = strnstr(bytes, string, numberOfBytes) { + return found == bytes + i + } + } + + // Allow for a BOM of up to four bytes (assuming BOM is only at the start) + if i < 4 { + i += 1 + continue + } + + break + } + + return false + } } diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift index f81e76963..ff7cdf559 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift @@ -183,15 +183,15 @@ class FeedParserTypeTests: XCTestCase { // MARK: Unknown -// func testPartialAllThisUnknownFeedType() { -// -// // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. -// // The type detector should return .unknown rather than .notAFeed. -// -// let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/") -// let type = feedType(d, isPartialData: true) -// XCTAssertEqual(type, .unknown) -// } + func testPartialAllThisUnknownFeedType() { + + // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. + // The type detector should return .unknown rather than .notAFeed. + + let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/") + let type = FeedType.feedType(d.data, isPartialData: true) + XCTAssertEqual(type, .unknown) + } // MARK: Performance From 784b915e7454d252417adf6df0f3c53ab1894889 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Thu, 12 Sep 2024 22:07:16 -0700 Subject: [PATCH 39/88] Finish porting FeedType support code. All tests pass. --- ...9A7464E0-C633-49A0-871F-1F5206C35DE8.plist | 52 +++++ .../FeedParserTests.xcbaseline/Info.plist | 33 +++ .../Sources/FeedParser/Feeds/FeedType.swift | 41 +++- .../FeedParserTests/FeedParserTypeTests.swift | 218 +++++++++--------- 4 files changed, 226 insertions(+), 118 deletions(-) create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist new file mode 100644 index 000000000..66ab5b65e --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/9A7464E0-C633-49A0-871F-1F5206C35DE8.plist @@ -0,0 +1,52 @@ + + + + + classNames + + FeedParserTypeTests + + testFeedTypePerformance() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000010 + baselineIntegrationDisplayName + Local Baseline + + + testFeedTypePerformance2() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000010 + baselineIntegrationDisplayName + Local Baseline + + + testFeedTypePerformance3() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000499 + baselineIntegrationDisplayName + Local Baseline + + + testFeedTypePerformance4() + + com.apple.XCTPerformanceMetric_WallClockTime + + baselineAverage + 0.000691 + baselineIntegrationDisplayName + Local Baseline + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist new file mode 100644 index 000000000..fee112bfa --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcbaselines/FeedParserTests.xcbaseline/Info.plist @@ -0,0 +1,33 @@ + + + + + runDestinationsByUUID + + 9A7464E0-C633-49A0-871F-1F5206C35DE8 + + localComputer + + busSpeedInMHz + 0 + cpuCount + 1 + cpuKind + Apple M1 Max + cpuSpeedInMHz + 0 + logicalCPUCoresPerPackage + 10 + modelCode + Mac13,1 + physicalCPUCoresPerPackage + 10 + platformIdentifier + com.apple.platform.macosx + + targetArchitecture + arm64e + + + + diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift index b297edbba..f26cd1ab2 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift @@ -38,21 +38,32 @@ public enum FeedType: Sendable { } let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self) + if isProbablyJSON(cCharPointer, count) { + + if isPartialData { + // Might not be able to detect a JSON Feed without all data. + // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) + // has, at this writing, the JSON version element at the end of the feed, + // which is totally legal — but it means not being able to detect + // that it’s a JSON Feed without all the data. + // So this returns .unknown instead of .notAFeed. + return .unknown + } + + if isProbablyJSONFeed(cCharPointer, count) { + return .jsonFeed + } + if isProbablyRSSInJSON(cCharPointer, count) { + return .rssInJSON + } + } + if isProbablyRSS(cCharPointer, count) { return .rss } if isProbablyAtom(cCharPointer, count) { return .atom } - if isPartialData && isProbablyJSON(cCharPointer, count) { - // Might not be able to detect a JSON Feed without all data. - // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) - // has, at this writing, the JSON version element at the end of the feed, - // which is totally legal — but it means not being able to detect - // that it’s a JSON Feed without all the data. - // So this returns .unknown instead of .notAFeed. - return .unknown - } return .notAFeed } @@ -80,6 +91,18 @@ private extension FeedType { bytesStartWithStringIgnoringWhitespace("{", bytes, count) } + static func isProbablyJSONFeed(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + // Assumes already called `isProbablyJSON` and it returned true. + didFindString("://jsonfeed.org/version/", bytes, count) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", bytes, count) + } + + static func isProbablyRSSInJSON(_ bytes: UnsafePointer, _ count: Int) -> Bool { + + // Assumes already called `isProbablyJSON` and it returned true. + didFindString("rss", bytes, count) && didFindString("channel", bytes, count) && didFindString("item", bytes, count) + } + static func didFindString(_ string: UnsafePointer, _ bytes: UnsafePointer, _ numberOfBytes: Int) -> Bool { let foundString = strnstr(bytes, string, numberOfBytes) diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift index ff7cdf559..c17f3ee8d 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift @@ -14,33 +14,33 @@ class FeedParserTypeTests: XCTestCase { // MARK: HTML -// func testDaringFireballHTMLType() { -// -// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") -// let type = feedType(d) -// XCTAssertTrue(type == .notAFeed) -// } -// -// func testFurboHTMLType() { -// -// let d = parserData("furbo", "html", "http://furbo.org/") -// let type = feedType(d) -// XCTAssertTrue(type == .notAFeed) -// } -// -// func testInessentialHTMLType() { -// -// let d = parserData("inessential", "html", "http://inessential.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .notAFeed) -// } -// -// func testSixColorsHTMLType() { -// -// let d = parserData("sixcolors", "html", "https://sixcolors.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .notAFeed) -// } + func testDaringFireballHTMLType() { + + let d = parserData("DaringFireball", "html", "http://daringfireball.net/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .notAFeed) + } + + func testFurboHTMLType() { + + let d = parserData("furbo", "html", "http://furbo.org/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .notAFeed) + } + + func testInessentialHTMLType() { + + let d = parserData("inessential", "html", "http://inessential.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .notAFeed) + } + + func testSixColorsHTMLType() { + + let d = parserData("sixcolors", "html", "https://sixcolors.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .notAFeed) + } // MARK: RSS @@ -135,51 +135,51 @@ class FeedParserTypeTests: XCTestCase { XCTAssertTrue(type == .atom) } -// // MARK: RSS-in-JSON -// -// func testScriptingNewsJSONType() { -// -// let d = parserData("ScriptingNews", "json", "http://scripting.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .rssInJSON) -// } -// -// // MARK: JSON Feed -// -// func testInessentialJSONFeedType() { -// -// let d = parserData("inessential", "json", "http://inessential.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .jsonFeed) -// } -// -// func testAllThisJSONFeedType() { -// -// let d = parserData("allthis", "json", "http://leancrew.com/allthis/") -// let type = feedType(d) -// XCTAssertTrue(type == .jsonFeed) -// } -// -// func testCurtJSONFeedType() { -// -// let d = parserData("curt", "json", "http://curtclifton.net/") -// let type = feedType(d) -// XCTAssertTrue(type == .jsonFeed) -// } -// -// func testPixelEnvyJSONFeedType() { -// -// let d = parserData("pxlnv", "json", "http://pxlnv.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .jsonFeed) -// } -// -// func testRoseJSONFeedType() { -// -// let d = parserData("rose", "json", "https://www.rosemaryorchard.com/") -// let type = feedType(d) -// XCTAssertTrue(type == .jsonFeed) -// } + // MARK: RSS-in-JSON + + func testScriptingNewsJSONType() { + + let d = parserData("ScriptingNews", "json", "http://scripting.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .rssInJSON) + } + + // MARK: JSON Feed + + func testInessentialJSONFeedType() { + + let d = parserData("inessential", "json", "http://inessential.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .jsonFeed) + } + + func testAllThisJSONFeedType() { + + let d = parserData("allthis", "json", "http://leancrew.com/allthis/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .jsonFeed) + } + + func testCurtJSONFeedType() { + + let d = parserData("curt", "json", "http://curtclifton.net/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .jsonFeed) + } + + func testPixelEnvyJSONFeedType() { + + let d = parserData("pxlnv", "json", "http://pxlnv.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .jsonFeed) + } + + func testRoseJSONFeedType() { + + let d = parserData("rose", "json", "https://www.rosemaryorchard.com/") + let type = FeedType.feedType(d.data) + XCTAssertTrue(type == .jsonFeed) + } // MARK: Unknown @@ -195,45 +195,45 @@ class FeedParserTypeTests: XCTestCase { // MARK: Performance -// func testFeedTypePerformance() { -// -// // 0.000 on my 2012 iMac. -// -// let d = parserData("EMarley", "rss", "https://medium.com/@emarley") -// self.measure { -// let _ = feedType(d) -// } -// } + func testFeedTypePerformance() { -// func testFeedTypePerformance2() { -// -// // 0.000 on my 2012 iMac. -// -// let d = parserData("inessential", "json", "http://inessential.com/") -// self.measure { -// let _ = feedType(d) -// } -// } + // 0.000 on my 2012 iMac. -// func testFeedTypePerformance3() { -// -// // 0.000 on my 2012 iMac. -// -// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") -// self.measure { -// let _ = feedType(d) -// } -// } -// -// func testFeedTypePerformance4() { -// -// // 0.001 on my 2012 iMac. -// -// let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") -// self.measure { -// let _ = feedType(d) -// } -// } + let d = parserData("EMarley", "rss", "https://medium.com/@emarley") + self.measure { + let _ = FeedType.feedType(d.data) + } + } + + func testFeedTypePerformance2() { + + // 0.000 on my 2012 iMac. + + let d = parserData("inessential", "json", "http://inessential.com/") + self.measure { + let _ = FeedType.feedType(d.data) + } + } + + func testFeedTypePerformance3() { + + // 0.000 on my 2012 iMac. + + let d = parserData("DaringFireball", "html", "http://daringfireball.net/") + self.measure { + let _ = FeedType.feedType(d.data) + } + } + + func testFeedTypePerformance4() { + + // 0.001 on my 2012 iMac. + + let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") + self.measure { + let _ = FeedType.feedType(d.data) + } + } } func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { From a85068f2861c505cf47544b3f3eb704c6a1e9137 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 13 Sep 2024 17:35:39 -0700 Subject: [PATCH 40/88] Restore and revise commented-out code in FeedParser. --- .../Sources/FeedParser/Feeds/FeedParser.swift | 90 +++++++------------ 1 file changed, 31 insertions(+), 59 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index 5189a18c9..c5c272433 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -14,67 +14,39 @@ import SAX public struct FeedParser { -// public static func canParse(_ parserData: ParserData) -> Bool { -// -// let type = feedType(parserData) -// -// switch type { -// case .jsonFeed, .rssInJSON, .rss, .atom: -// return true -// default: -// return false -// } -// } + public static func canParse(_ parserData: ParserData) -> Bool { + + let type = FeedType.feedType(parserData.data) + + switch type { + case .jsonFeed, .rssInJSON, .rss, .atom: + return true + default: + return false + } + } public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { - let rssFeed = RSSParser.parsedFeed(with: parserData) - let parsedFeed = RSSFeedTransformer.parsedFeed(with: rssFeed) - - return parsedFeed -// let type = feedType(parserData) -// -// switch type { -// -// case .jsonFeed: -// return try JSONFeedParser.parse(parserData) -// -// case .rssInJSON: -// return try RSSInJSONParser.parse(parserData) -// -// case .rss: -// return RSSParser.parse(parserData) -// -// case .atom: -// return AtomParser.parse(parserData) -// -// case .unknown, .notAFeed: -// return nil -// } + let type = FeedType.feedType(parserData.data) + + switch type { + + case .jsonFeed: + return nil // TODO: try JSONFeedParser.parse(parserData) + + case .rssInJSON: + return nil // TODO: try RSSInJSONParser.parse(parserData) + + case .rss: + let rssFeed = RSSParser.parsedFeed(with: parserData) + return RSSFeedTransformer.parsedFeed(with: rssFeed) + + case .atom: + return nil // TODO: AtomParser.parse(parserData) + + case .unknown, .notAFeed: + return nil + } } - -// /// For unit tests measuring performance. -// public static func parseSync(_ parserData: ParserData) throws -> ParsedFeed? { -// -// let type = feedType(parserData) -// -// switch type { -// -// case .jsonFeed: -// return try JSONFeedParser.parse(parserData) -// -// case .rssInJSON: -// return try RSSInJSONParser.parse(parserData) -// -// case .rss: -// return RSSParser.parse(parserData) -// -// case .atom: -// return AtomParser.parse(parserData) -// -// case .unknown, .notAFeed: -// return nil -// } -// } - } From 88b315554c8948357adf647308e82844ed24b14b Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 13 Sep 2024 17:46:48 -0700 Subject: [PATCH 41/88] Start AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 74 ++++++++++++++----- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index e3b4610e7..5929d03ab 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -8,22 +8,62 @@ import Foundation import SAX +import DateParser -// RSSParser wraps the Objective-C RSAtomParser. -// -// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. -// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates -// the same things that JSONFeedParser and RSSInJSONParser create. -// -// In general, you should see FeedParser.swift for all your feed-parsing needs. +final class AtomParser { + + private var parserData: ParserData + private var feedURL: String { + parserData.url + } + private var data: Data { + parserData.data + } + + private let feed: RSSFeed + private var articles = [RSSArticle]() + private var currentArticle: RSSArticle? { + articles.last + } + + static func parsedFeed(with parserData: ParserData) -> RSSFeed { + + let parser = AtomParser(parserData) + parser.parse() + return parser.feed + } + + init(_ parserData: ParserData) { + self.parserData = parserData + self.feed = RSSFeed(urlString: parserData.url) + } +} + +private extension AtomParser { + + func parse() { + + let saxParser = SAXParser(delegate: self, data: data) + saxParser.parse() + feed.articles = articles + } + + +} + +extension AtomParser: SAXParserDelegate { + + public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + + } + + public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + + } + + public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { + + // Required method. + } +} -//public struct AtomParser { -// -// public static func parse(_ parserData: ParserData) -> ParsedFeed? { -// -// if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) { -// return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) -// } -// return nil -// } -//} From ea495d1fe3cdbe82f4616078110a10a232be39a0 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 13 Sep 2024 19:35:18 -0700 Subject: [PATCH 42/88] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 5929d03ab..292d883ea 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -21,11 +21,21 @@ final class AtomParser { } private let feed: RSSFeed + private var articles = [RSSArticle]() private var currentArticle: RSSArticle? { articles.last } + private var attributesStack = [SAXParser.XMLAttributesDictionary]() + private var currentAttributes: SAXParser.XMLAttributesDictionary? { + attributesStack.last + } + + private var parsingArticle = false + private var parsingXHTML = false + private var endFeedFound = false + static func parsedFeed(with parserData: ParserData) -> RSSFeed { let parser = AtomParser(parserData) @@ -48,6 +58,11 @@ private extension AtomParser { feed.articles = articles } + func addArticle() { + let article = RSSArticle(feedURL) + articles.append(article) + } + } @@ -55,6 +70,24 @@ extension AtomParser: SAXParserDelegate { public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer?) { + if endFeedFound { + return + } + + let xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) ?? SAXParser.XMLAttributesDictionary() + attributesStack.append(xmlAttributes) + + if parsingXHTML { +// addXHTMLTag(localName) + return + } + +// if SAXEqualTags(localName, "entry") { +// parsingArticle = true +// addArticle() +// return +// } + } public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { From f830008983d13dadb0ee2e1536790d09e5bfe5b5 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 13 Sep 2024 20:18:22 -0700 Subject: [PATCH 43/88] Add `addXHTMLTag` to AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 52 ++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 292d883ea..26aba2a61 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -32,8 +32,10 @@ final class AtomParser { attributesStack.last } - private var parsingArticle = false private var parsingXHTML = false + private var xhtmlString: String? + + private var parsingArticle = false private var endFeedFound = false static func parsedFeed(with parserData: ParserData) -> RSSFeed { @@ -58,12 +60,48 @@ private extension AtomParser { feed.articles = articles } + private struct XMLName { + static let entry = "entry".utf8CString + } + func addArticle() { let article = RSSArticle(feedURL) articles.append(article) } + func addXHTMLTag(_ localName: XMLPointer) { + guard var xhtmlString else { + assertionFailure("xhtmlString must not be nil when in addXHTMLTag.") + return + } + + let name: String? = { + let data = Data(bytes: localName, count: strlen(localName)) + return String(data: data, encoding: .utf8) + }() + guard let name else { + assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.") + return + } + + xhtmlString.append("<") + xhtmlString.append(name) + + if let currentAttributes, currentAttributes.count > 0 { + for (key, value) in currentAttributes { + xhtmlString.append(" ") + xhtmlString.append(key) + xhtmlString.append("=\"") + + let encodedValue = value.replacingOccurrences(of: "\"", with: """) + xhtmlString.append(encodedValue) + xhtmlString.append("\"") + } + } + + xhtmlString.append(">") + } } extension AtomParser: SAXParserDelegate { @@ -78,15 +116,15 @@ extension AtomParser: SAXParserDelegate { attributesStack.append(xmlAttributes) if parsingXHTML { -// addXHTMLTag(localName) + addXHTMLTag(localName) return } -// if SAXEqualTags(localName, "entry") { -// parsingArticle = true -// addArticle() -// return -// } + if SAXEqualTags(localName, XMLName.entry) { + parsingArticle = true + addArticle() + return + } } From 28a68199012b33b1e2a9937e08750d9b083fed10 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Fri, 13 Sep 2024 21:56:27 -0700 Subject: [PATCH 44/88] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 56 +++++++++++++++++++ .../FeedParser/Feeds/XML/RSSAuthor.swift | 8 +-- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 26aba2a61..368a7588a 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -35,7 +35,11 @@ final class AtomParser { private var parsingXHTML = false private var xhtmlString: String? + private var currentAuthor: RSSAuthor? + private var parsingAuthor = false + private var parsingArticle = false + private var parsingSource = false private var endFeedFound = false static func parsedFeed(with parserData: ParserData) -> RSSFeed { @@ -62,8 +66,22 @@ private extension AtomParser { private struct XMLName { static let entry = "entry".utf8CString + static let content = "content".utf8CString + static let summary = "summary".utf8CString + static let link = "link".utf8CString + static let feed = "feed".utf8CString + static let source = "source".utf8CString + static let author = "author".utf8CString } + func addFeedLink() { + + } + + func addFeedLanguage() { + + } + func addArticle() { let article = RSSArticle(feedURL) articles.append(article) @@ -126,6 +144,44 @@ extension AtomParser: SAXParserDelegate { return } + if SAXEqualTags(localName, XMLName.author) { + parsingAuthor = true + currentAuthor = RSSAuthor() + return + } + + if SAXEqualTags(localName, XMLName.source) { + parsingSource = true + return + } + + let isContentTag = SAXEqualTags(localName, XMLName.content) + let isSummaryTag = SAXEqualTags(localName, XMLName.summary) + + if parsingArticle && (isContentTag || isSummaryTag) { + + if isContentTag { + currentArticle?.language = xmlAttributes["xml:lang"] + } + + let contentType = xmlAttributes["type"]; + if contentType == "xhtml" { + parsingXHTML = true + xhtmlString = "" + return + } + } + + if !parsingArticle && SAXEqualTags(localName, XMLName.link) { + addFeedLink() + return + } + + if SAXEqualTags(localName, XMLName.feed) { + addFeedLanguage() + } + + saxParser.beginStoringCharacters() } public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift index a153ecb1e..9a5e70f88 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift @@ -14,7 +14,7 @@ final class RSSAuthor { var avatarURL: String? var emailAddress: String? - init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) { + init(name: String? = nil, url: String? = nil, avatarURL: String? = nil, emailAddress: String? = nil) { self.name = name self.url = url self.avatarURL = avatarURL @@ -25,11 +25,11 @@ final class RSSAuthor { convenience init(singleString: String) { if singleString.contains("@") { - self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString) + self.init(emailAddress: singleString) } else if singleString.lowercased().hasPrefix("http") { - self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil) + self.init(url: singleString) } else { - self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil) + self.init(name: singleString) } } } From b09250f7dc072daa5072d0099705a7ae102c5639 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 10:59:55 -0700 Subject: [PATCH 45/88] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 23 +++++++++++++------ Modules/Parser/Sources/SAX/SAXUtilities.swift | 8 +++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 368a7588a..edc154c8d 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -81,7 +81,7 @@ private extension AtomParser { func addFeedLanguage() { } - + func addArticle() { let article = RSSArticle(feedURL) articles.append(article) @@ -94,11 +94,7 @@ private extension AtomParser { return } - let name: String? = { - let data = Data(bytes: localName, count: strlen(localName)) - return String(data: data, encoding: .utf8) - }() - guard let name else { + guard let name = String(xmlPointer: localName) else { assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.") return } @@ -190,7 +186,20 @@ extension AtomParser: SAXParserDelegate { public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { - // Required method. + guard parsingXHTML else { + return + } + guard var s = String(xmlPointer: xmlCharactersFound, count: count) else { + return + } + + // libxml decodes all entities; we need to re-encode certain characters + // (<, >, and &) when inside XHTML text content. + s = s.replacingOccurrences(of: "<", with: "&;lt;") + s = s.replacingOccurrences(of: ">", with: "&;gt;") + s = s.replacingOccurrences(of: "&", with: "&") + + xhtmlString = s } } diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift index 10ba86f0a..3bb680e89 100644 --- a/Modules/Parser/Sources/SAX/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -31,3 +31,11 @@ public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray) return localName[tagCount - 1] == 0 } } + +public extension String { + + init?(xmlPointer: XMLPointer, count: Int? = nil) { + let d = Data(bytes: xmlPointer, count: count ?? strlen(xmlPointer)) + self.init(data: d, encoding: .utf8) + } +} From 0311518d1e7acee14d56c896db48c7a3ddb699d9 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 11:19:35 -0700 Subject: [PATCH 46/88] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 91 +++++++++++++++++++ .../FeedParser/Feeds/XML/RSSAuthor.swift | 5 + 2 files changed, 96 insertions(+) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index edc154c8d..fe32232c8 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -72,6 +72,14 @@ private extension AtomParser { static let feed = "feed".utf8CString static let source = "source".utf8CString static let author = "author".utf8CString + static let name = "name".utf8CString + static let email = "email".utf8CString + static let uri = "uri".utf8CString + static let title = "title".utf8CString + } + + func addFeedTitle() { + } func addFeedLink() { @@ -87,6 +95,10 @@ private extension AtomParser { articles.append(article) } + func addArticleElement(_ localName: XMLPointer, _ prefix: XMLPointer?) { + + } + func addXHTMLTag(_ localName: XMLPointer) { guard var xhtmlString else { @@ -182,6 +194,85 @@ extension AtomParser: SAXParserDelegate { public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) { + if SAXEqualTags(localName, XMLName.feed) { + endFeedFound = true + return + } + + if endFeedFound { + return + } + + if parsingXHTML { + + let isContentTag = SAXEqualTags(localName, XMLName.content) + let isSummaryTag = SAXEqualTags(localName, XMLName.summary) + + if parsingArticle && (isContentTag || isSummaryTag) { + + if isContentTag { + currentArticle?.body = xhtmlString + } + + else if isSummaryTag { + if (currentArticle?.body?.count ?? 0) < 1 { + currentArticle?.body = xhtmlString + } + } + } + + if isContentTag || isSummaryTag { + parsingXHTML = false + } + + if var xhtmlString { + if let localNameString = String(xmlPointer: localName) { + xhtmlString.append("") + } + } else { + assertionFailure("xhtmlString must not be nil when parsingXHTML in xmlEndElement.") + } + } + + else if parsingAuthor { + + if SAXEqualTags(localName, XMLName.author) { + parsingAuthor = false + if let currentAuthor, !currentAuthor.isEmpty() { + currentArticle?.addAuthor(currentAuthor) + } + currentAuthor = nil + } + else if SAXEqualTags(localName, XMLName.name) { + currentAuthor?.name = saxParser.currentStringWithTrimmedWhitespace + } + else if SAXEqualTags(localName, XMLName.email) { + currentAuthor?.emailAddress = saxParser.currentStringWithTrimmedWhitespace + } + else if SAXEqualTags(localName, XMLName.uri) { + currentAuthor?.url = saxParser.currentStringWithTrimmedWhitespace + } + } + + else if SAXEqualTags(localName, XMLName.entry) { + parsingArticle = false + } + + else if parsingArticle && !parsingSource { + addArticleElement(localName, prefix) + } + + else if SAXEqualTags(localName, XMLName.source) { + parsingSource = false + } + + else if !parsingArticle && !parsingSource && SAXEqualTags(localName, XMLName.title) { + addFeedTitle() + } + + _ = attributesStack.popLast() } public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift index 9a5e70f88..b0b93a6e3 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift @@ -32,4 +32,9 @@ final class RSSAuthor { self.init(name: singleString) } } + + func isEmpty() -> Bool { + + name != nil || url != nil || avatarURL != nil || emailAddress != nil + } } From 54d2285bff121313afbc17e354092c19716b01c1 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 11:31:04 -0700 Subject: [PATCH 47/88] Add addFeedLink function. --- .../FeedParser/Feeds/XML/AtomParser.swift | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index fe32232c8..2ad654907 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -78,12 +78,31 @@ private extension AtomParser { static let title = "title".utf8CString } - func addFeedTitle() { + private struct XMLString { + static let rel = "rel" + static let alternate = "alternate" + static let href = "href" + } + func addFeedTitle(_ saxParser: SAXParser) { + + guard feed.title == nil else { + return + } + if let title = saxParser.currentStringWithTrimmedWhitespace, !title.isEmpty { + feed.title = title + } } func addFeedLink() { + guard feed.link == nil, let currentAttributes else { + return + } + + if let related = currentAttributes[XMLString.rel], related == XMLString.alternate { + feed.link = currentAttributes[XMLString.href] + } } func addFeedLanguage() { @@ -269,7 +288,7 @@ extension AtomParser: SAXParserDelegate { } else if !parsingArticle && !parsingSource && SAXEqualTags(localName, XMLName.title) { - addFeedTitle() + addFeedTitle(saxParser) } _ = attributesStack.popLast() From 613038b0d90f916098d129bff8a01326d4cc18c4 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 14:19:37 -0700 Subject: [PATCH 48/88] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 92 ++++++++++++++++++- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 2ad654907..a0aa59da8 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -76,12 +76,33 @@ private extension AtomParser { static let email = "email".utf8CString static let uri = "uri".utf8CString static let title = "title".utf8CString + static let id = "id".utf8CString + static let published = "published".utf8CString + static let updated = "updated".utf8CString + static let issued = "issued".utf8CString + static let modified = "modified".utf8CString } private struct XMLString { static let rel = "rel" static let alternate = "alternate" static let href = "href" + static let xmlLang = "xml:lang" + } + + func currentString(_ saxParser: SAXParser) -> String? { + + saxParser.currentStringWithTrimmedWhitespace + } + + func currentDate(_ saxParser: SAXParser) -> Date? { + + guard let data = saxParser.currentCharacters else { + assertionFailure("Unexpected nil saxParser.currentCharacters in AtomParser.currentDate") + return nil + } + + return DateParser.date(data: data) } func addFeedTitle(_ saxParser: SAXParser) { @@ -89,7 +110,8 @@ private extension AtomParser { guard feed.title == nil else { return } - if let title = saxParser.currentStringWithTrimmedWhitespace, !title.isEmpty { + + if let title = currentString(saxParser), !title.isEmpty { feed.title = title } } @@ -107,6 +129,11 @@ private extension AtomParser { func addFeedLanguage() { + guard feed.language == nil, let currentAttributes else { + return + } + + feed.language = currentAttributes[XMLString.xmlLang] } func addArticle() { @@ -114,7 +141,66 @@ private extension AtomParser { articles.append(article) } - func addArticleElement(_ localName: XMLPointer, _ prefix: XMLPointer?) { + func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) { + + guard prefix == nil else { + return + } + guard let currentArticle else { + assertionFailure("currentArticle must not be nil in AtomParser.addArticleElement") + return + } + + if SAXEqualTags(localName, XMLName.id) { + currentArticle.guid = currentString(saxParser) + } + + else if SAXEqualTags(localName, XMLName.title) { + currentArticle.title = currentString(saxParser) + } + + else if SAXEqualTags(localName, XMLName.content) { + addContent(saxParser, currentArticle) + } + + else if SAXEqualTags(localName, XMLName.summary) { + addSummary(saxParser, currentArticle) + } + + else if SAXEqualTags(localName, XMLName.link) { + addLink(currentArticle) + } + + else if SAXEqualTags(localName, XMLName.published) { + currentArticle.datePublished = currentDate(saxParser) + } + + else if SAXEqualTags(localName, XMLName.updated) { + currentArticle.dateModified = currentDate(saxParser) + } + + // Atom 0.3 dates + else if SAXEqualTags(localName, XMLName.issued) { + if currentArticle.datePublished == nil { + currentArticle.datePublished = currentDate(saxParser) + } + } + else if SAXEqualTags(localName, XMLName.modified) { + if currentArticle.dateModified == nil { + currentArticle.dateModified = currentDate(saxParser) + } + } + } + + func addContent(_ saxParser: SAXParser, _ article: RSSArticle) { + + article.body = currentString(saxParser) + } + + func addSummary(_ saxParser: SAXParser, _ article: RSSArticle) { + } + + func addLink(_ article: RSSArticle) { } @@ -280,7 +366,7 @@ extension AtomParser: SAXParserDelegate { } else if parsingArticle && !parsingSource { - addArticleElement(localName, prefix) + addArticleElement(saxParser, localName, prefix) } else if SAXEqualTags(localName, XMLName.source) { From e22c17fd6a1fa6bce3fbc2abeee2dfec73624bf3 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 14:45:51 -0700 Subject: [PATCH 49/88] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index a0aa59da8..9c69d24bc 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -86,7 +86,12 @@ private extension AtomParser { private struct XMLString { static let rel = "rel" static let alternate = "alternate" + static let related = "related" + static let enclosure = "enclosure" static let href = "href" + static let title = "title" + static let type = "type" + static let length = "length" static let xmlLang = "xml:lang" } @@ -202,6 +207,46 @@ private extension AtomParser { func addLink(_ article: RSSArticle) { + guard let attributes = currentAttributes else { + return + } + guard let urlString = attributes[XMLString.href], !urlString.isEmpty else { + return + } + + var rel = attributes[XMLString.rel] + if rel?.isEmpty ?? true { + rel = XMLString.alternate + } + + if rel == XMLString.related { + if article.link == nil { + article.link = urlString + } + } + else if rel == XMLString.alternate { + if article.permalink == nil { + article.permalink = urlString + } + } + else if rel == XMLString.enclosure { + if let enclosure = enclosure(urlString, attributes) { + article.addEnclosure(enclosure) + } + } + } + + func enclosure(_ urlString: String, _ attributes: SAXParser.XMLAttributesDictionary) -> RSSEnclosure? { + + let enclosure = RSSEnclosure(url: urlString) + enclosure.title = attributes[XMLString.title] + enclosure.mimeType = attributes[XMLString.type] + + if let lengthString = attributes[XMLString.length] { + enclosure.length = Int(lengthString) + } + + return enclosure } func addXHTMLTag(_ localName: XMLPointer) { From 73ef8f085af5750a7838b5df49f0b4ec06c84433 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 14:49:25 -0700 Subject: [PATCH 50/88] Finish first draft of AtomParser. --- .../Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 9c69d24bc..5d01f2479 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -203,6 +203,11 @@ private extension AtomParser { } func addSummary(_ saxParser: SAXParser, _ article: RSSArticle) { + + guard article.body == nil else { + return + } + article.body = currentString(saxParser) } func addLink(_ article: RSSArticle) { @@ -443,4 +448,3 @@ extension AtomParser: SAXParserDelegate { xhtmlString = s } } - From e1f3da7b0987c6ba2fcb6cc6442873df14d87863 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 14:49:38 -0700 Subject: [PATCH 51/88] Use AtomParser in FeedParser. --- Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index c5c272433..125c83532 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -39,11 +39,12 @@ public struct FeedParser { return nil // TODO: try RSSInJSONParser.parse(parserData) case .rss: - let rssFeed = RSSParser.parsedFeed(with: parserData) - return RSSFeedTransformer.parsedFeed(with: rssFeed) + let feed = RSSParser.parsedFeed(with: parserData) + return RSSFeedTransformer.parsedFeed(with: feed) case .atom: - return nil // TODO: AtomParser.parse(parserData) + let feed = AtomParser.parsedFeed(with: parserData) + return RSSFeedTransformer.parsedFeed(with: feed) case .unknown, .notAFeed: return nil From 96cbc51d26269e9171d05f9ee864b3f03dce7c8f Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 15:03:34 -0700 Subject: [PATCH 52/88] Get AtomParserTests running and passing. --- .../Sources/FeedParser/Feeds/FeedParser.swift | 4 +- .../FeedParser/Feeds/XML/RSSAuthor.swift | 2 +- .../Feeds/XML/RSSFeedTransformer.swift | 6 +- .../FeedParserTests/AtomParserTests.swift | 192 +++++++++--------- 4 files changed, 102 insertions(+), 102 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index 125c83532..2e60b7dad 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -40,11 +40,11 @@ public struct FeedParser { case .rss: let feed = RSSParser.parsedFeed(with: parserData) - return RSSFeedTransformer.parsedFeed(with: feed) + return RSSFeedTransformer.parsedFeed(with: feed, feedType: .rss) case .atom: let feed = AtomParser.parsedFeed(with: parserData) - return RSSFeedTransformer.parsedFeed(with: feed) + return RSSFeedTransformer.parsedFeed(with: feed, feedType: .atom) case .unknown, .notAFeed: return nil diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift index b0b93a6e3..297470b85 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift @@ -35,6 +35,6 @@ final class RSSAuthor { func isEmpty() -> Bool { - name != nil || url != nil || avatarURL != nil || emailAddress != nil + name == nil && url == nil && avatarURL == nil && emailAddress == nil } } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift index 4c14907f1..4264a12f5 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift @@ -11,10 +11,10 @@ import Foundation struct RSSFeedTransformer { /// Turn an internal RSSFeed into a public ParsedFeed. - static func parsedFeed(with rssFeed: RSSFeed) -> ParsedFeed { + static func parsedFeed(with feed: RSSFeed, feedType: FeedType) -> ParsedFeed { - let items = parsedItems(rssFeed.articles) - return ParsedFeed(type: .rss, title: rssFeed.title, homePageURL: rssFeed.link, feedURL: rssFeed.urlString, language: rssFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) + let items = parsedItems(feed.articles) + return ParsedFeed(type: feedType, title: feed.title, homePageURL: feed.link, feedURL: feed.urlString, language: feed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) } } diff --git a/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift b/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift index 71647f10f..3a83ba994 100644 --- a/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift @@ -9,99 +9,99 @@ import XCTest import FeedParser -//class AtomParserTests: XCTestCase { -// -// func testDaringFireballPerformance() { -// -// // 0.009 sec on my 2012 iMac. -// let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed -// self.measure { -// let _ = try! FeedParser.parseSync(d) -// } -// } -// -// func testAllThisPerformance() { -// -// // 0.003 sec on my 2012 iMac. -// let d = parserData("allthis", "atom", "http://leancrew.com/all-this") -// self.measure { -// let _ = try! FeedParser.parseSync(d) -// } -// } -// -// func testGettingHomePageLink() async { -// -// let d = parserData("allthis", "atom", "http://leancrew.com/all-this") -// let parsedFeed = try! await FeedParser.parse(d)! -// -// XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this") -// } -// -// func testDaringFireball() async { -// -// let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed -// let parsedFeed = try! await FeedParser.parse(d)! -// -// for article in parsedFeed.items { -// -// XCTAssertNotNil(article.url) -// -// XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/")) -// -// XCTAssertEqual(article.authors!.count, 1) // TODO: parse Atom authors -// let author = article.authors!.first! -// if author.name == "Daring Fireball Department of Commerce" { -// XCTAssertNil(author.url) -// } -// else { -// XCTAssertEqual(author.name, "John Gruber") -// XCTAssertEqual(author.url, "http://daringfireball.net/") -// } -// -// XCTAssertNotNil(article.datePublished) -// XCTAssert(article.attachments == nil) -// -// XCTAssertEqual(article.language, "en") -// } -// } -// -// func test4fsodonlineAttachments() async { -// -// // Thanks to Marco for finding me some Atom podcast feeds. Apparently they’re super-rare. -// -// let d = parserData("4fsodonline", "atom", "http://4fsodonline.blogspot.com/") -// let parsedFeed = try! await FeedParser.parse(d)! -// -// for article in parsedFeed.items { -// -// XCTAssertTrue(article.attachments!.count > 0) -// let attachment = article.attachments!.first! -// -// XCTAssertTrue(attachment.url.hasPrefix("http://www.blogger.com/video-play.mp4?")) -// XCTAssertNil(attachment.sizeInBytes) -// XCTAssertEqual(attachment.mimeType!, "video/mp4") -// } -// } -// -// func testExpertOpinionENTAttachments() async { -// -// // Another from Marco. -// -// let d = parserData("expertopinionent", "atom", "http://expertopinionent.typepad.com/my-blog/") -// let parsedFeed = try! await FeedParser.parse(d)! -// -// for article in parsedFeed.items { -// -// guard let attachments = article.attachments else { -// continue -// } -// -// XCTAssertEqual(attachments.count, 1) -// let attachment = attachments.first! -// -// XCTAssertTrue(attachment.url.hasSuffix(".mp3")) -// XCTAssertNil(attachment.sizeInBytes) -// XCTAssertEqual(attachment.mimeType!, "audio/mpeg") -// } -// } -//} +class AtomParserTests: XCTestCase { + + func testDaringFireballPerformance() { + + // 0.009 sec on my 2012 iMac. + let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed + self.measure { + let _ = try! FeedParser.parse(d) + } + } + + func testAllThisPerformance() { + + // 0.003 sec on my 2012 iMac. + let d = parserData("allthis", "atom", "http://leancrew.com/all-this") + self.measure { + let _ = try! FeedParser.parse(d) + } + } + + func testGettingHomePageLink() { + + let d = parserData("allthis", "atom", "http://leancrew.com/all-this") + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this") + } + + func testDaringFireball() { + + let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed + let parsedFeed = try! FeedParser.parse(d)! + + for article in parsedFeed.items { + + XCTAssertNotNil(article.url) + + XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/")) + + XCTAssertEqual(article.authors!.count, 1) // TODO: parse Atom authors + let author = article.authors!.first! + if author.name == "Daring Fireball Department of Commerce" { + XCTAssertNil(author.url) + } + else { + XCTAssertEqual(author.name, "John Gruber") + XCTAssertEqual(author.url, "http://daringfireball.net/") + } + + XCTAssertNotNil(article.datePublished) + XCTAssert(article.attachments == nil) + + XCTAssertEqual(article.language, "en") + } + } + + func test4fsodonlineAttachments() { + + // Thanks to Marco for finding me some Atom podcast feeds. Apparently they’re super-rare. + + let d = parserData("4fsodonline", "atom", "http://4fsodonline.blogspot.com/") + let parsedFeed = try! FeedParser.parse(d)! + + for article in parsedFeed.items { + + XCTAssertTrue(article.attachments!.count > 0) + let attachment = article.attachments!.first! + + XCTAssertTrue(attachment.url.hasPrefix("http://www.blogger.com/video-play.mp4?")) + XCTAssertNil(attachment.sizeInBytes) + XCTAssertEqual(attachment.mimeType!, "video/mp4") + } + } + + func testExpertOpinionENTAttachments() { + + // Another from Marco. + + let d = parserData("expertopinionent", "atom", "http://expertopinionent.typepad.com/my-blog/") + let parsedFeed = try! FeedParser.parse(d)! + + for article in parsedFeed.items { + + guard let attachments = article.attachments else { + continue + } + + XCTAssertEqual(attachments.count, 1) + let attachment = attachments.first! + + XCTAssertTrue(attachment.url.hasSuffix(".mp3")) + XCTAssertNil(attachment.sizeInBytes) + XCTAssertEqual(attachment.mimeType!, "audio/mpeg") + } + } +} From be09d2cda2be0b974c4a9c4f77782a932b1eacec Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 15:40:27 -0700 Subject: [PATCH 53/88] Start work on HTMLEntityDecoder port. --- .../Sources/SAX/HTMLEntityDecoder.swift | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift new file mode 100644 index 000000000..5f37fe593 --- /dev/null +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -0,0 +1,51 @@ +// +// HTMLEntityDecoder.swift +// +// +// Created by Brent Simmons on 9/14/24. +// + +import Foundation + +public final class HTMLEntityDecoder { + + static func decodedString(withEncodedString encodedString: String) -> String { + + let scanner = Scanner(string: encodedString) + scanner.charactersToBeSkipped = nil + var result = "" + var didDecodeAtLeastOneEntity = false + + while true { + + var scannedString = nil + if scanner.scanUpToString("&" intoString: &scannedString) { + result.append(scannedString) + } + if scanner.isAtEnd { + break + } + + let savedScanLocation = scanner.scanLocation + + var decodedEntity: String? = nil + if scanner.scanEntityValue(&decodedEntity) { + result.append(decodedEntity) + didDecodeAtLeastOneEntity = true + } + else { + result.append("&") + scanner.scanLocation = savedScanLocation + 1 + } + + if scanner.isAtEnd { + break + } + } + + if !didDecodeAtLeastOneEntity { // No changes made? + return encodedString + } + return result + } +} From d5a7baf53fe3a0d04ccfb411bf9a293851bb71ca Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 15:40:46 -0700 Subject: [PATCH 54/88] Uncomment JSONFeedParser. --- .../Sources/DateParser/DateParser.swift | 8 + .../Feeds/JSON/JSONFeedParser.swift | 491 +++++++++--------- 2 files changed, 254 insertions(+), 245 deletions(-) diff --git a/Modules/Parser/Sources/DateParser/DateParser.swift b/Modules/Parser/Sources/DateParser/DateParser.swift index 4c9f9271d..f2d5d0ac4 100644 --- a/Modules/Parser/Sources/DateParser/DateParser.swift +++ b/Modules/Parser/Sources/DateParser/DateParser.swift @@ -38,6 +38,14 @@ public final class DateParser { } } + public static func date(string: String) -> Date? { + + guard let data = string.data(using: .utf8) else { + return nil + } + return date(data: data) + } + private typealias DateBuffer = UnsafeBufferPointer // See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift index 723ec1afb..17c9d7eb2 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -1,248 +1,249 @@ -//// -//// JSONFeedParser.swift -//// RSParser -//// -//// Created by Brent Simmons on 6/25/17. -//// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -//// // -//import Foundation -//import SAX +// JSONFeedParser.swift +// RSParser // -//// See https://jsonfeed.org/version/1.1 +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. // -//public struct JSONFeedParser { -// -// struct Key { -// static let version = "version" -// static let items = "items" -// static let title = "title" -// static let homePageURL = "home_page_url" -// static let feedURL = "feed_url" -// static let feedDescription = "description" -// static let nextURL = "next_url" -// static let icon = "icon" -// static let favicon = "favicon" -// static let expired = "expired" -// static let author = "author" -// static let authors = "authors" -// static let name = "name" -// static let url = "url" -// static let avatar = "avatar" -// static let hubs = "hubs" -// static let type = "type" -// static let contentHTML = "content_html" -// static let contentText = "content_text" -// static let externalURL = "external_url" -// static let summary = "summary" -// static let image = "image" -// static let bannerImage = "banner_image" -// static let datePublished = "date_published" -// static let dateModified = "date_modified" -// static let tags = "tags" -// static let uniqueID = "id" -// static let attachments = "attachments" -// static let mimeType = "mime_type" -// static let sizeInBytes = "size_in_bytes" -// static let durationInSeconds = "duration_in_seconds" -// static let language = "language" -// } -// -// static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. -// -// public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { -// -// guard let d = JSONUtilities.dictionary(with: parserData.data) else { -// throw FeedParserError(.invalidJSON) -// } -// -// guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else { -// throw FeedParserError(.jsonFeedVersionNotFound) -// } -// guard let itemsArray = d[Key.items] as? JSONArray else { -// throw FeedParserError(.jsonFeedItemsNotFound) -// } -// guard let title = d[Key.title] as? String else { -// throw FeedParserError(.jsonFeedTitleNotFound) -// } -// -// let authors = parseAuthors(d) -// let homePageURL = d[Key.homePageURL] as? String -// let feedURL = d[Key.feedURL] as? String ?? parserData.url -// let feedDescription = d[Key.feedDescription] as? String -// let nextURL = d[Key.nextURL] as? String -// let iconURL = d[Key.icon] as? String -// let faviconURL = d[Key.favicon] as? String -// let expired = d[Key.expired] as? Bool ?? false -// let hubs = parseHubs(d) -// let language = d[Key.language] as? String -// -// let items = parseItems(itemsArray, parserData.url) -// -// return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items) -// } -//} -// -//private extension JSONFeedParser { -// -// static func parseAuthors(_ dictionary: JSONDictionary) -> Set? { -// -// if let authorsArray = dictionary[Key.authors] as? JSONArray { -// var authors = Set() -// for author in authorsArray { -// if let parsedAuthor = parseAuthor(author) { -// authors.insert(parsedAuthor) -// } -// } -// return authors -// } -// -// guard let authorDictionary = dictionary[Key.author] as? JSONDictionary, -// let parsedAuthor = parseAuthor(authorDictionary) else { -// return nil -// } -// -// return Set([parsedAuthor]) -// } -// -// static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? { -// let name = dictionary[Key.name] as? String -// let url = dictionary[Key.url] as? String -// let avatar = dictionary[Key.avatar] as? String -// if name == nil && url == nil && avatar == nil { -// return nil -// } -// return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil) -// } -// -// static func parseHubs(_ dictionary: JSONDictionary) -> Set? { -// -// guard let hubsArray = dictionary[Key.hubs] as? JSONArray else { -// return nil -// } -// -// let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in -// guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else { -// return nil -// } -// return ParsedHub(type: hubType, url: hubURL) -// } -// return hubs.isEmpty ? nil : Set(hubs) -// } -// -// static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set { -// -// return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in -// return parseItem(oneItemDictionary, feedURL) -// }) -// } -// -// static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { -// -// guard let uniqueID = parseUniqueID(itemDictionary) else { -// return nil -// } -// -// let contentHTML = itemDictionary[Key.contentHTML] as? String -// let contentText = itemDictionary[Key.contentText] as? String -// if contentHTML == nil && contentText == nil { -// return nil -// } -// -// let url = itemDictionary[Key.url] as? String -// let externalURL = itemDictionary[Key.externalURL] as? String -// let title = parseTitle(itemDictionary, feedURL) -// let language = itemDictionary[Key.language] as? String -// let summary = itemDictionary[Key.summary] as? String -// let imageURL = itemDictionary[Key.image] as? String -// let bannerImageURL = itemDictionary[Key.bannerImage] as? String -// -// let datePublished = parseDate(itemDictionary[Key.datePublished] as? String) -// let dateModified = parseDate(itemDictionary[Key.dateModified] as? String) -// -// let authors = parseAuthors(itemDictionary) -// var tags: Set? = nil -// if let tagsArray = itemDictionary[Key.tags] as? [String] { -// tags = Set(tagsArray) -// } -// let attachments = parseAttachments(itemDictionary) -// -// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments) -// } -// -// static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? { -// -// guard let title = itemDictionary[Key.title] as? String else { -// return nil -// } -// -// if isSpecialCaseTitleWithEntitiesFeed(feedURL) { -// return (title as NSString).rsparser_stringByDecodingHTMLEntities() -// } -// -// return title -// } -// -// static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool { -// -// // As of 16 Feb. 2018, Kottke’s and Heer’s feeds includes HTML entities in the title elements. -// // If we find more feeds like this, we’ll add them here. If these feeds get fixed, we’ll remove them. -// -// let lowerFeedURL = feedURL.lowercased() -// let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"] -// for matchString in matchStrings { -// if lowerFeedURL.contains(matchString) { -// return true -// } -// } -// -// return false -// } -// -// static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? { -// -// if let uniqueID = itemDictionary[Key.uniqueID] as? String { -// return uniqueID // Spec says it must be a string -// } -// // Version 1 spec also says that if it’s a number, even though that’s incorrect, it should be coerced to a string. -// if let uniqueID = itemDictionary[Key.uniqueID] as? Int { -// return "\(uniqueID)" -// } -// if let uniqueID = itemDictionary[Key.uniqueID] as? Double { -// return "\(uniqueID)" -// } -// return nil -// } -// -// static func parseDate(_ dateString: String?) -> Date? { -// -// guard let dateString = dateString, !dateString.isEmpty else { -// return nil -// } -// return RSDateWithString(dateString) -// } -// -// static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { -// -// guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else { -// return nil -// } -// return Set(attachmentsArray.compactMap { parseAttachment($0) }) -// } -// -// static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? { -// -// guard let url = attachmentObject[Key.url] as? String else { -// return nil -// } -// guard let mimeType = attachmentObject[Key.mimeType] as? String else { -// return nil -// } -// -// let title = attachmentObject[Key.title] as? String -// let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int -// let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int -// -// return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds) -// } -//} + +import Foundation +import SAX +import DateParser + +// See https://jsonfeed.org/version/1.1 + +public struct JSONFeedParser { + + struct Key { + static let version = "version" + static let items = "items" + static let title = "title" + static let homePageURL = "home_page_url" + static let feedURL = "feed_url" + static let feedDescription = "description" + static let nextURL = "next_url" + static let icon = "icon" + static let favicon = "favicon" + static let expired = "expired" + static let author = "author" + static let authors = "authors" + static let name = "name" + static let url = "url" + static let avatar = "avatar" + static let hubs = "hubs" + static let type = "type" + static let contentHTML = "content_html" + static let contentText = "content_text" + static let externalURL = "external_url" + static let summary = "summary" + static let image = "image" + static let bannerImage = "banner_image" + static let datePublished = "date_published" + static let dateModified = "date_modified" + static let tags = "tags" + static let uniqueID = "id" + static let attachments = "attachments" + static let mimeType = "mime_type" + static let sizeInBytes = "size_in_bytes" + static let durationInSeconds = "duration_in_seconds" + static let language = "language" + } + + static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. + + public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + + guard let d = JSONUtilities.dictionary(with: parserData.data) else { + throw FeedParserError(.invalidJSON) + } + + guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else { + throw FeedParserError(.jsonFeedVersionNotFound) + } + guard let itemsArray = d[Key.items] as? JSONArray else { + throw FeedParserError(.jsonFeedItemsNotFound) + } + guard let title = d[Key.title] as? String else { + throw FeedParserError(.jsonFeedTitleNotFound) + } + + let authors = parseAuthors(d) + let homePageURL = d[Key.homePageURL] as? String + let feedURL = d[Key.feedURL] as? String ?? parserData.url + let feedDescription = d[Key.feedDescription] as? String + let nextURL = d[Key.nextURL] as? String + let iconURL = d[Key.icon] as? String + let faviconURL = d[Key.favicon] as? String + let expired = d[Key.expired] as? Bool ?? false + let hubs = parseHubs(d) + let language = d[Key.language] as? String + + let items = parseItems(itemsArray, parserData.url) + + return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items) + } +} + +private extension JSONFeedParser { + + static func parseAuthors(_ dictionary: JSONDictionary) -> Set? { + + if let authorsArray = dictionary[Key.authors] as? JSONArray { + var authors = Set() + for author in authorsArray { + if let parsedAuthor = parseAuthor(author) { + authors.insert(parsedAuthor) + } + } + return authors + } + + guard let authorDictionary = dictionary[Key.author] as? JSONDictionary, + let parsedAuthor = parseAuthor(authorDictionary) else { + return nil + } + + return Set([parsedAuthor]) + } + + static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? { + let name = dictionary[Key.name] as? String + let url = dictionary[Key.url] as? String + let avatar = dictionary[Key.avatar] as? String + if name == nil && url == nil && avatar == nil { + return nil + } + return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil) + } + + static func parseHubs(_ dictionary: JSONDictionary) -> Set? { + + guard let hubsArray = dictionary[Key.hubs] as? JSONArray else { + return nil + } + + let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in + guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else { + return nil + } + return ParsedHub(type: hubType, url: hubURL) + } + return hubs.isEmpty ? nil : Set(hubs) + } + + static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set { + + return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in + return parseItem(oneItemDictionary, feedURL) + }) + } + + static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { + + guard let uniqueID = parseUniqueID(itemDictionary) else { + return nil + } + + let contentHTML = itemDictionary[Key.contentHTML] as? String + let contentText = itemDictionary[Key.contentText] as? String + if contentHTML == nil && contentText == nil { + return nil + } + + let url = itemDictionary[Key.url] as? String + let externalURL = itemDictionary[Key.externalURL] as? String + let title = parseTitle(itemDictionary, feedURL) + let language = itemDictionary[Key.language] as? String + let summary = itemDictionary[Key.summary] as? String + let imageURL = itemDictionary[Key.image] as? String + let bannerImageURL = itemDictionary[Key.bannerImage] as? String + + let datePublished = parseDate(itemDictionary[Key.datePublished] as? String) + let dateModified = parseDate(itemDictionary[Key.dateModified] as? String) + + let authors = parseAuthors(itemDictionary) + var tags: Set? = nil + if let tagsArray = itemDictionary[Key.tags] as? [String] { + tags = Set(tagsArray) + } + let attachments = parseAttachments(itemDictionary) + + return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments) + } + + static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? { + + guard let title = itemDictionary[Key.title] as? String else { + return nil + } + + if isSpecialCaseTitleWithEntitiesFeed(feedURL) { + return (title as NSString).rsparser_stringByDecodingHTMLEntities() + } + + return title + } + + static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool { + + // As of 16 Feb. 2018, Kottke’s and Heer’s feeds includes HTML entities in the title elements. + // If we find more feeds like this, we’ll add them here. If these feeds get fixed, we’ll remove them. + + let lowerFeedURL = feedURL.lowercased() + let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"] + for matchString in matchStrings { + if lowerFeedURL.contains(matchString) { + return true + } + } + + return false + } + + static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? { + + if let uniqueID = itemDictionary[Key.uniqueID] as? String { + return uniqueID // Spec says it must be a string + } + // Version 1 spec also says that if it’s a number, even though that’s incorrect, it should be coerced to a string. + if let uniqueID = itemDictionary[Key.uniqueID] as? Int { + return "\(uniqueID)" + } + if let uniqueID = itemDictionary[Key.uniqueID] as? Double { + return "\(uniqueID)" + } + return nil + } + + static func parseDate(_ dateString: String?) -> Date? { + + guard let dateString = dateString, !dateString.isEmpty else { + return nil + } + return DateParser.date(string: dateString) + } + + static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { + + guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else { + return nil + } + return Set(attachmentsArray.compactMap { parseAttachment($0) }) + } + + static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? { + + guard let url = attachmentObject[Key.url] as? String else { + return nil + } + guard let mimeType = attachmentObject[Key.mimeType] as? String else { + return nil + } + + let title = attachmentObject[Key.title] as? String + let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int + let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int + + return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds) + } +} From f835182bc667a0c81093945cdce89b41c844014a Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 15 Sep 2024 14:26:01 -0700 Subject: [PATCH 55/88] Start HTMLEntityDecoded. --- .../Sources/SAX/HTMLEntityDecoder.swift | 46 +++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index 5f37fe593..4dd12242b 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -18,8 +18,8 @@ public final class HTMLEntityDecoder { while true { - var scannedString = nil - if scanner.scanUpToString("&" intoString: &scannedString) { + var scannedString: NSString? = nil + if scanner.scanUpTo("&", into: &scannedString) { result.append(scannedString) } if scanner.isAtEnd { @@ -37,7 +37,7 @@ public final class HTMLEntityDecoder { result.append("&") scanner.scanLocation = savedScanLocation + 1 } - + if scanner.isAtEnd { break } @@ -49,3 +49,43 @@ public final class HTMLEntityDecoder { return result } } + +/// Purpose-built version of NSScanner, which has deprecated the parts we want to use. +final class RSScanner { + + let string: String + let count: Int + var scanLocation = 0 + + var isAtEnd { + scanLocation >= count - 1 + } + + init(string: String) { + self.string = string + self.count = string.count + } + + /// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`. + /// - Returns: nil when there were no characters accumulated (next character was `characterToFind` or already at end of string) + func scanUpTo(_ characterToFind: Character) -> String? { + + if isAtEnd { + return nil + } + + while true { + + + } + } + + private func currentCharacter() -> Character? { + + + + } + + private func + +} From 26d0a19c8b2ac92dd173d99514c7b04c5e028582 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 15 Sep 2024 20:43:45 -0700 Subject: [PATCH 56/88] Continue progress on HTMLEntityDecoder. --- .../Sources/SAX/HTMLEntityDecoder.swift | 102 +++++++++++++----- 1 file changed, 78 insertions(+), 24 deletions(-) diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index 4dd12242b..dee3fca5b 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -11,15 +11,14 @@ public final class HTMLEntityDecoder { static func decodedString(withEncodedString encodedString: String) -> String { - let scanner = Scanner(string: encodedString) - scanner.charactersToBeSkipped = nil + let scanner = EntityScanner(string: encodedString) var result = "" var didDecodeAtLeastOneEntity = false while true { - var scannedString: NSString? = nil - if scanner.scanUpTo("&", into: &scannedString) { + let scannedString = scanner.scanUpTo(Character("&")) + if !scannedString.isEmpty { result.append(scannedString) } if scanner.isAtEnd { @@ -28,8 +27,7 @@ public final class HTMLEntityDecoder { let savedScanLocation = scanner.scanLocation - var decodedEntity: String? = nil - if scanner.scanEntityValue(&decodedEntity) { + if let decodedEntity = scanner.scanEntityValue() { result.append(decodedEntity) didDecodeAtLeastOneEntity = true } @@ -43,7 +41,7 @@ public final class HTMLEntityDecoder { } } - if !didDecodeAtLeastOneEntity { // No changes made? + if !didDecodeAtLeastOneEntity { // No entities decoded? return encodedString } return result @@ -51,14 +49,21 @@ public final class HTMLEntityDecoder { } /// Purpose-built version of NSScanner, which has deprecated the parts we want to use. -final class RSScanner { +final class EntityScanner { let string: String let count: Int var scanLocation = 0 - var isAtEnd { - scanLocation >= count - 1 + var isAtEnd: Bool { + scanLocation >= count + } + + var currentCharacter: Character? { + guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else { + return nil + } + return string[index] } init(string: String) { @@ -67,25 +72,74 @@ final class RSScanner { } /// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`. - /// - Returns: nil when there were no characters accumulated (next character was `characterToFind` or already at end of string) - func scanUpTo(_ characterToFind: Character) -> String? { + /// - Returns: the scanned portion before `characterToFind`. May be empty string. + func scanUpTo(_ characterToFind: Character) -> String { - if isAtEnd { - return nil - } + var scanned = "" while true { + guard let ch = currentCharacter else { + break + } + scanLocation += 1 + + if ch == characterToFind { + break + } + else { + scanned.append(ch) + } + } + + return scanned + } + +// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { +// +// NSString *s = self.string; +// NSUInteger initialScanLocation = self.scanLocation; +// static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. +// +// while (true) { +// +// unichar ch = [s characterAtIndex:self.scanLocation]; +// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { +// break; +// } +// if (ch == ';') { +// if (!decodedEntity) { +// return YES; +// } +// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; +// *decodedEntity = [rawEntity rs_stringByDecodingEntity]; +// self.scanLocation = self.scanLocation + 1; +// return *decodedEntity != nil; +// } +// +// self.scanLocation = self.scanLocation + 1; +// if (self.scanLocation - initialScanLocation > maxEntityLength) { +// break; +// } +// if (self.isAtEnd) { +// break; +// } +// } +// +// return NO; +// } + + func scanEntityValue() -> String? { + + let initialScanLocation = scanLocation + let maxEntityLength = 20 // It’s probably smaller, but this is just for sanity. + + while true { + + guard let ch = currentCharacter } + + return nil } - - private func currentCharacter() -> Character? { - - - - } - - private func - } From 6779ef94dd04caae5e7b61c6877b729a97f4f076 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 15 Sep 2024 21:51:48 -0700 Subject: [PATCH 57/88] Continue progress on HTMLEntityDecoder. --- .../Feeds/JSON/JSONFeedParser.swift | 2 +- .../Sources/SAX/HTMLEntityDecoder.swift | 103 +++++++++++------- 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift index 17c9d7eb2..36cc1f23d 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -178,7 +178,7 @@ private extension JSONFeedParser { } if isSpecialCaseTitleWithEntitiesFeed(feedURL) { - return (title as NSString).rsparser_stringByDecodingHTMLEntities() + return HTMLEntityDecoder.decodedString(title) } return title diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index dee3fca5b..70fdf5d3a 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -9,7 +9,9 @@ import Foundation public final class HTMLEntityDecoder { - static func decodedString(withEncodedString encodedString: String) -> String { + static let ampersandCharacter = Character("&") + + public static func decodedString(_ encodedString: String) -> String { let scanner = EntityScanner(string: encodedString) var result = "" @@ -17,7 +19,7 @@ public final class HTMLEntityDecoder { while true { - let scannedString = scanner.scanUpTo(Character("&")) + let scannedString = scanner.scanUpTo(Self.ampersandCharacter) if !scannedString.isEmpty { result.append(scannedString) } @@ -60,10 +62,10 @@ final class EntityScanner { } var currentCharacter: Character? { - guard !isAtEnd, let index = string.index(string.startIndex, offsetBy: scanLocation, limitedBy: string.endIndex) else { + guard !isAtEnd else { return nil } - return string[index] + return string.characterAtIntIndex(scanLocation) } init(string: String) { @@ -95,39 +97,7 @@ final class EntityScanner { return scanned } -// - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { -// -// NSString *s = self.string; -// NSUInteger initialScanLocation = self.scanLocation; -// static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. -// -// while (true) { -// -// unichar ch = [s characterAtIndex:self.scanLocation]; -// if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { -// break; -// } -// if (ch == ';') { -// if (!decodedEntity) { -// return YES; -// } -// NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; -// *decodedEntity = [rawEntity rs_stringByDecodingEntity]; -// self.scanLocation = self.scanLocation + 1; -// return *decodedEntity != nil; -// } -// -// self.scanLocation = self.scanLocation + 1; -// if (self.scanLocation - initialScanLocation > maxEntityLength) { -// break; -// } -// if (self.isAtEnd) { -// break; -// } -// } -// -// return NO; -// } + static let semicolonCharacter = Character(";") func scanEntityValue() -> String? { @@ -136,10 +106,67 @@ final class EntityScanner { while true { - guard let ch = currentCharacter + guard let ch = currentCharacter else { + break + } + if CharacterSet.whitespacesAndNewlines.contains(ch.unicodeScalars.first!) { + break + } + if ch == Self.semicolonCharacter { + let entityRange = initialScanLocation.. maxEntityLength { + break + } + if isAtEnd { + break + } } return nil } } + +extension String { + + func indexForInt(_ i: Int) -> Index? { + + index(startIndex, offsetBy: i, limitedBy: endIndex) + } + + func characterAtIntIndex(_ i: Int) -> Character? { + + guard let index = indexForInt(i) else { + return nil + } + + return self[index] + } + + func substring(intRange: Range) -> String? { + + guard let rangeLower = indexForInt(intRange.lowerBound) else { + return nil + } + guard let rangeUpper = indexForInt(intRange.upperBound) else { + return nil + } + + return String(self[rangeLower.. String? { + + return nil +} From 8e4e859071b0dc50fea7f31e14b555e3295bbe4d Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 15 Sep 2024 21:59:07 -0700 Subject: [PATCH 58/88] Add comment to decodedEntity. --- Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index 70fdf5d3a..78dbc540f 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -166,6 +166,7 @@ extension String { } } +/// rawEntity is assumed not to have opening `&` and closing `;`. private func decodedEntity(_ rawEntity: String) -> String? { return nil From e315820b477c00913801435efece3e2d14f3690c Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 16 Sep 2024 21:56:55 -0700 Subject: [PATCH 59/88] Continue progress on HTMLEntityDecoder. --- .../Sources/SAX/HTMLEntityDecoder.swift | 188 +++++++++++++++++- 1 file changed, 182 insertions(+), 6 deletions(-) diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index 78dbc540f..2c629565f 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -9,8 +9,6 @@ import Foundation public final class HTMLEntityDecoder { - static let ampersandCharacter = Character("&") - public static func decodedString(_ encodedString: String) -> String { let scanner = EntityScanner(string: encodedString) @@ -19,7 +17,7 @@ public final class HTMLEntityDecoder { while true { - let scannedString = scanner.scanUpTo(Self.ampersandCharacter) + let scannedString = scanner.scanUpToAmpersand() if !scannedString.isEmpty { result.append(scannedString) } @@ -73,12 +71,15 @@ final class EntityScanner { self.count = string.count } + static let ampersandCharacter = Character("&") + /// Scans up to `characterToFind` and returns the characters up to (and not including) `characterToFind`. /// - Returns: the scanned portion before `characterToFind`. May be empty string. - func scanUpTo(_ characterToFind: Character) -> String { + func scanUpToAmpersand() -> String { + let characterToFind = Self.ampersandCharacter var scanned = "" - + while true { guard let ch = currentCharacter else { @@ -166,8 +167,183 @@ extension String { } } -/// rawEntity is assumed not to have opening `&` and closing `;`. +/// rawEntity may or may not have leading `&` and/or trailing `;` characters. private func decodedEntity(_ rawEntity: String) -> String? { + var s = rawEntity + + if s.hasPrefix("&") { + s.removeFirst() + } + if s.hasSuffix(";") { + s.removeLast() + } + + if let decodedEntity = entitiesDictionary[s] { + return decodedEntity + } + + if s.hasPrefix("#x") || s.hasPrefix("#X") { // Hex + let scanner = Scanner(string: s) + scanner.charactersToBeSkipped = CharacterSet(charactersIn: "#xX") + var hexValue: UInt64 = 0 + if scanner.scanHexInt64(&hexValue) { + return stringWithValue(UInt32(hexValue)) + } + return nil + } + + else if s.hasPrefix("#") { + s.removeFirst() + guard let value = UInt32(s), value >= 1 else { + return nil + } + return stringWithValue(value) + } + return nil } + +private func stringWithValue(_ value: UInt32) -> String? { + + // From WebCore's HTMLEntityParser + let windowsLatin1ExtensionArray: [UInt32] = [ + 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F + 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F + ] + + var modifiedValue = value + + if (modifiedValue & ~0x1F) == 0x80 { // value >= 128 && value < 160 + modifiedValue = windowsLatin1ExtensionArray[Int(modifiedValue - 0x80)] + } + + modifiedValue = CFSwapInt32HostToLittle(modifiedValue) + + let data = Data(bytes: &modifiedValue, count: MemoryLayout.size(ofValue: modifiedValue)) + + return String(data: data, encoding: .utf32LittleEndian) +} + +private let entitiesDictionary = + [ + "AElig": "Æ", + "Aacute": "Á", + "Acirc": "Â", + "Agrave": "À", + "Aring": "Å", + "Atilde": "Ã", + "Auml": "Ä", + "Ccedil": "Ç", + "Dstrok": "Ð", + "ETH": "Ð", + "Eacute": "É", + "Ecirc": "Ê", + "Egrave": "È", + "Euml": "Ë", + "Iacute": "Í", + "Icirc": "Î", + "Igrave": "Ì", + "Iuml": "Ï", + "Ntilde": "Ñ", + "Oacute": "Ó", + "Ocirc": "Ô", + "Ograve": "Ò", + "Oslash": "Ø", + "Otilde": "Õ", + "Ouml": "Ö", + "Pi": "Π", + "THORN": "Þ", + "Uacute": "Ú", + "Ucirc": "Û", + "Ugrave": "Ù", + "Uuml": "Ü", + "Yacute": "Y", + "aacute": "á", + "acirc": "â", + "acute": "´", + "aelig": "æ", + "agrave": "à", + "amp": "&", + "apos": "'", + "aring": "å", + "atilde": "ã", + "auml": "ä", + "brkbar": "¦", + "brvbar": "¦", + "ccedil": "ç", + "cedil": "¸", + "cent": "¢", + "copy": "©", + "curren": "¤", + "deg": "°", + "die": "¨", + "divide": "÷", + "eacute": "é", + "ecirc": "ê", + "egrave": "è", + "eth": "ð", + "euml": "ë", + "euro": "€", + "frac12": "½", + "frac14": "¼", + "frac34": "¾", + "gt": ">", + "hearts": "♥", + "hellip": "…", + "iacute": "í", + "icirc": "î", + "iexcl": "¡", + "igrave": "ì", + "iquest": "¿", + "iuml": "ï", + "laquo": "«", + "ldquo": "“", + "lsquo": "‘", + "lt": "<", + "macr": "¯", + "mdash": "—", + "micro": "µ", + "middot": "·", + "ndash": "–", + "not": "¬", + "ntilde": "ñ", + "oacute": "ó", + "ocirc": "ô", + "ograve": "ò", + "ordf": "ª", + "ordm": "º", + "oslash": "ø", + "otilde": "õ", + "ouml": "ö", + "para": "¶", + "pi": "π", + "plusmn": "±", + "pound": "£", + "quot": "\"", + "raquo": "»", + "rdquo": "”", + "reg": "®", + "rsquo": "’", + "sect": "§", + "shy": stringWithValue(173), + "sup1": "¹", + "sup2": "²", + "sup3": "³", + "szlig": "ß", + "thorn": "þ", + "times": "×", + "trade": "™", + "uacute": "ú", + "ucirc": "û", + "ugrave": "ù", + "uml": "¨", + "uuml": "ü", + "yacute": "y", + "yen": "¥", + "yuml": "ÿ", + "infin": "∞", + "nbsp": stringWithValue(160) + ] From 88675adff2c5916c2c92f92c7f0dd24ebfb83f17 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 16 Sep 2024 21:57:05 -0700 Subject: [PATCH 60/88] Restore EntityDecodingTests. --- .../FeedParserTests/EntityDecodingTests.swift | 73 +++++++++---------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift index 7a2ea1cb4..bc55d3e91 100644 --- a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift @@ -7,41 +7,40 @@ // import XCTest -import FeedParser +import SAX -//class EntityDecodingTests: XCTestCase { -// -// func test39Decoding() { -// -// // Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog. -// -// let s = "These are the times that try men's souls." -// let decoded = s.rsparser_stringByDecodingHTMLEntities() -// -// XCTAssertEqual(decoded, "These are the times that try men's souls.") -// } -// -// func testEntities() { -// var s = "…" -// var decoded = s.rsparser_stringByDecodingHTMLEntities() -// -// XCTAssertEqual(decoded, "…") -// -// s = "…" -// decoded = s.rsparser_stringByDecodingHTMLEntities() -// XCTAssertEqual(decoded, "…") -// -// s = "'" -// decoded = s.rsparser_stringByDecodingHTMLEntities() -// XCTAssertEqual(decoded, "'") -// -// s = "§" -// decoded = s.rsparser_stringByDecodingHTMLEntities() -// XCTAssertEqual(decoded, "§") -// -// s = "£" -// decoded = s.rsparser_stringByDecodingHTMLEntities() -// XCTAssertEqual(decoded, "£") -// -// } -//} +class EntityDecodingTests: XCTestCase { + + func test39Decoding() { + + // Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog. + + let s = "These are the times that try men's souls." + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "These are the times that try men's souls.") + } + + func testEntities() { + var s = "…" + var decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "…") + + s = "…" + decoded = HTMLEntityDecoder.decodedString(s) + XCTAssertEqual(decoded, "…") + + s = "'" + decoded = HTMLEntityDecoder.decodedString(s) + XCTAssertEqual(decoded, "'") + + s = "§" + decoded = HTMLEntityDecoder.decodedString(s) + XCTAssertEqual(decoded, "§") + + s = "£" + decoded = HTMLEntityDecoder.decodedString(s) + XCTAssertEqual(decoded, "£") + } +} From 32a7480e6c1f42464ffc5422c5bef675e5ae57af Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 16 Sep 2024 22:07:22 -0700 Subject: [PATCH 61/88] Fix bug in HTMLEntityDecoder. Make tests pass. --- Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift index 2c629565f..15fd31e29 100644 --- a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift +++ b/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift @@ -121,7 +121,7 @@ final class EntityScanner { scanLocation = initialScanLocation + 1 return nil } - scanLocation = initialScanLocation + 1 + scanLocation = scanLocation + 1 return decodedEntity } From a48615b06063c36687f99007b74cf02732f99fa7 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 16 Sep 2024 22:07:31 -0700 Subject: [PATCH 62/88] Add additional EntityDecodingTests. --- .../FeedParserTests/EntityDecodingTests.swift | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift index bc55d3e91..110e698d9 100644 --- a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift @@ -21,7 +21,39 @@ class EntityDecodingTests: XCTestCase { XCTAssertEqual(decoded, "These are the times that try men's souls.") } - func testEntities() { + func testEntityAtBeginning() { + + let s = "'leading single quote" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "'leading single quote") + } + + func testEntityAtEnd() { + + let s = "trailing single quote'" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "trailing single quote'") + } + + func testEntityInMiddle() { + + let s = "entity ç in middle" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "entity ç in middle") + } + + func testMultipleEntitiesInARow() { + + let s = "çèmult……iple 'æ"entities÷♥" + let decoded = HTMLEntityDecoder.decodedString(s) + + XCTAssertEqual(decoded, "çèmult……iple 'æ\"entities÷♥") + } + + func testOnlyEntity() { var s = "…" var decoded = HTMLEntityDecoder.decodedString(s) From 887ef8c22e2cd97818871eedc2727b02c2a1546d Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 11:11:45 -0700 Subject: [PATCH 63/88] Make JSONFeedParserTests run. --- .../Sources/FeedParser/Feeds/FeedParser.swift | 2 +- .../FeedParserTests/JSONFeedParserTests.swift | 226 +++++++++--------- 2 files changed, 114 insertions(+), 114 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index 2e60b7dad..153cf861b 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -33,7 +33,7 @@ public struct FeedParser { switch type { case .jsonFeed: - return nil // TODO: try JSONFeedParser.parse(parserData) + return try JSONFeedParser.parse(parserData) case .rssInJSON: return nil // TODO: try RSSInJSONParser.parse(parserData) diff --git a/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift index 688857de5..a315481af 100644 --- a/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift @@ -9,116 +9,116 @@ import XCTest import FeedParser -//class JSONFeedParserTests: XCTestCase { -// -// func testInessentialPerformance() { -// -// // 0.001 sec on my 2012 iMac. -// let d = parserData("inessential", "json", "http://inessential.com/") -// self.measure { -// let _ = try! FeedParser.parseSync(d) -// } -// } -// -// func testDaringFireballPerformance() { -// -// // 0.009 sec on my 2012 iMac. -// let d = parserData("DaringFireball", "json", "http://daringfireball.net/") -// self.measure { -// let _ = try! FeedParser.parseSync(d) -// } -// } -// -// func testGettingFaviconAndIconURLs() async { -// -// let d = parserData("DaringFireball", "json", "http://daringfireball.net/") -// let parsedFeed = try! await FeedParser.parse(d)! -// -// XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png") -// XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png") -// } -// -// func testAllThis() async { -// -// let d = parserData("allthis", "json", "http://leancrew.com/allthis/") -// let parsedFeed = try! await FeedParser.parse(d)! -// -// XCTAssertEqual(parsedFeed.items.count, 12) -// } -// -// func testCurt() async { -// -// let d = parserData("curt", "json", "http://curtclifton.net/") -// let parsedFeed = try! await FeedParser.parse(d)! -// -// XCTAssertEqual(parsedFeed.items.count, 26) -// -// var didFindTwitterQuitterArticle = false -// for article in parsedFeed.items { -// if article.title == "Twitter Quitter" { -// didFindTwitterQuitterArticle = true -// XCTAssertTrue(article.contentHTML!.hasPrefix("

I’ve decided to close my Twitter account. William Van Hecke makes a convincing case")) -// } -// } -// -// XCTAssertTrue(didFindTwitterQuitterArticle) -// } -// -// func testPixelEnvy() async { -// -// let d = parserData("pxlnv", "json", "http://pxlnv.com/") -// let parsedFeed = try! await FeedParser.parse(d)! -// XCTAssertEqual(parsedFeed.items.count, 20) -// -// } -// -// func testRose() async { -// let d = parserData("rose", "json", "http://www.rosemaryorchard.com/") -// let parsedFeed = try! await FeedParser.parse(d)! -// XCTAssertEqual(parsedFeed.items.count, 84) -// } -// -// func test3960() async { -// let d = parserData("3960", "json", "http://journal.3960.org/") -// let parsedFeed = try! await FeedParser.parse(d)! -// XCTAssertEqual(parsedFeed.items.count, 20) -// XCTAssertEqual(parsedFeed.language, "de-DE") -// -// for item in parsedFeed.items { -// XCTAssertEqual(item.language, "de-DE") -// } -// } -// -// func testAuthors() async { -// let d = parserData("authors", "json", "https://example.com/") -// let parsedFeed = try! await FeedParser.parse(d)! -// XCTAssertEqual(parsedFeed.items.count, 4) -// -// let rootAuthors = Set([ -// ParsedAuthor(name: "Root Author 1", url: nil, avatarURL: nil, emailAddress: nil), -// ParsedAuthor(name: "Root Author 2", url: nil, avatarURL: nil, emailAddress: nil) -// ]) -// let itemAuthors = Set([ -// ParsedAuthor(name: "Item Author 1", url: nil, avatarURL: nil, emailAddress: nil), -// ParsedAuthor(name: "Item Author 2", url: nil, avatarURL: nil, emailAddress: nil) -// ]) -// let legacyItemAuthors = Set([ -// ParsedAuthor(name: "Legacy Item Author", url: nil, avatarURL: nil, emailAddress: nil) -// ]) -// -// XCTAssertEqual(parsedFeed.authors?.count, 2) -// XCTAssertEqual(parsedFeed.authors, rootAuthors) -// -// let noAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item without authors" }! -// XCTAssertEqual(noAuthorsItem.authors, nil) -// -// let legacyAuthorItem = parsedFeed.items.first { $0.uniqueID == "Item with legacy author" }! -// XCTAssertEqual(legacyAuthorItem.authors, legacyItemAuthors) -// -// let modernAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with modern authors" }! -// XCTAssertEqual(modernAuthorsItem.authors, itemAuthors) -// -// let bothAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with both" }! -// XCTAssertEqual(bothAuthorsItem.authors, itemAuthors) -// } -//} +class JSONFeedParserTests: XCTestCase { + + func testInessentialPerformance() { + + // 0.001 sec on my 2012 iMac. + let d = parserData("inessential", "json", "http://inessential.com/") + self.measure { + let _ = try! FeedParser.parse(d) + } + } + + func testDaringFireballPerformance() { + + // 0.009 sec on my 2012 iMac. + let d = parserData("DaringFireball", "json", "http://daringfireball.net/") + self.measure { + let _ = try! FeedParser.parse(d) + } + } + + func testGettingFaviconAndIconURLs() async { + + let d = parserData("DaringFireball", "json", "http://daringfireball.net/") + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png") + XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png") + } + + func testAllThis() async { + + let d = parserData("allthis", "json", "http://leancrew.com/allthis/") + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertEqual(parsedFeed.items.count, 12) + } + + func testCurt() async { + + let d = parserData("curt", "json", "http://curtclifton.net/") + let parsedFeed = try! FeedParser.parse(d)! + + XCTAssertEqual(parsedFeed.items.count, 26) + + var didFindTwitterQuitterArticle = false + for article in parsedFeed.items { + if article.title == "Twitter Quitter" { + didFindTwitterQuitterArticle = true + XCTAssertTrue(article.contentHTML!.hasPrefix("

I’ve decided to close my Twitter account. William Van Hecke makes a convincing case")) + } + } + + XCTAssertTrue(didFindTwitterQuitterArticle) + } + + func testPixelEnvy() async { + + let d = parserData("pxlnv", "json", "http://pxlnv.com/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssertEqual(parsedFeed.items.count, 20) + + } + + func testRose() async { + let d = parserData("rose", "json", "http://www.rosemaryorchard.com/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssertEqual(parsedFeed.items.count, 84) + } + + func test3960() async { + let d = parserData("3960", "json", "http://journal.3960.org/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssertEqual(parsedFeed.items.count, 20) + XCTAssertEqual(parsedFeed.language, "de-DE") + + for item in parsedFeed.items { + XCTAssertEqual(item.language, "de-DE") + } + } + + func testAuthors() async { + let d = parserData("authors", "json", "https://example.com/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssertEqual(parsedFeed.items.count, 4) + + let rootAuthors = Set([ + ParsedAuthor(name: "Root Author 1", url: nil, avatarURL: nil, emailAddress: nil), + ParsedAuthor(name: "Root Author 2", url: nil, avatarURL: nil, emailAddress: nil) + ]) + let itemAuthors = Set([ + ParsedAuthor(name: "Item Author 1", url: nil, avatarURL: nil, emailAddress: nil), + ParsedAuthor(name: "Item Author 2", url: nil, avatarURL: nil, emailAddress: nil) + ]) + let legacyItemAuthors = Set([ + ParsedAuthor(name: "Legacy Item Author", url: nil, avatarURL: nil, emailAddress: nil) + ]) + + XCTAssertEqual(parsedFeed.authors?.count, 2) + XCTAssertEqual(parsedFeed.authors, rootAuthors) + + let noAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item without authors" }! + XCTAssertEqual(noAuthorsItem.authors, nil) + + let legacyAuthorItem = parsedFeed.items.first { $0.uniqueID == "Item with legacy author" }! + XCTAssertEqual(legacyAuthorItem.authors, legacyItemAuthors) + + let modernAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with modern authors" }! + XCTAssertEqual(modernAuthorsItem.authors, itemAuthors) + + let bothAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with both" }! + XCTAssertEqual(bothAuthorsItem.authors, itemAuthors) + } +} From ecfe504d9d7cb22cf00700da75a8345ffc8ed8c4 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 11:20:23 -0700 Subject: [PATCH 64/88] Make RSS-in-JSON tests work. --- .../Sources/FeedParser/Feeds/FeedParser.swift | 2 +- .../Feeds/JSON/RSSInJSONParser.swift | 359 +++++++++--------- .../RSSInJSONParserTests.swift | 34 +- 3 files changed, 198 insertions(+), 197 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index 153cf861b..6e467f7b4 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -36,7 +36,7 @@ public struct FeedParser { return try JSONFeedParser.parse(parserData) case .rssInJSON: - return nil // TODO: try RSSInJSONParser.parse(parserData) + return try RSSInJSONParser.parse(parserData) case .rss: let feed = RSSParser.parsedFeed(with: parserData) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift index 4bf2ad624..2dcd92313 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift @@ -1,182 +1,183 @@ -//// -//// RSSInJSONParser.swift -//// RSParser -//// -//// Created by Brent Simmons on 6/24/17. -//// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -//// // -//import Foundation -//import SAX +// RSSInJSONParser.swift +// RSParser // -//// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md -//// Also: http://cyber.harvard.edu/rss/rss.html +// Created by Brent Simmons on 6/24/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. // -//public struct RSSInJSONParser { -// -// public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { -// -// do { -// guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { -// throw FeedParserError(.invalidJSON) -// } -// guard let rssObject = parsedObject["rss"] as? JSONDictionary else { -// throw FeedParserError(.rssChannelNotFound) -// } -// guard let channelObject = rssObject["channel"] as? JSONDictionary else { -// throw FeedParserError(.rssChannelNotFound) -// } -// -// // I’d bet money that in practice the items array won’t always appear correctly inside the channel object. -// // I’d also bet that sometimes it gets called "items" instead of "item". -// var itemsObject = channelObject["item"] as? JSONArray -// if itemsObject == nil { -// itemsObject = parsedObject["item"] as? JSONArray -// } -// if itemsObject == nil { -// itemsObject = channelObject["items"] as? JSONArray -// } -// if itemsObject == nil { -// itemsObject = parsedObject["items"] as? JSONArray -// } -// if itemsObject == nil { -// throw FeedParserError(.rssItemsNotFound) -// } -// -// let title = channelObject["title"] as? String -// let homePageURL = channelObject["link"] as? String -// let feedURL = parserData.url -// let feedDescription = channelObject["description"] as? String -// let feedLanguage = channelObject["language"] as? String -// -// let items = parseItems(itemsObject!, parserData.url) -// -// return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) -// -// } -// catch { throw error } -// } -//} -// -//private extension RSSInJSONParser { -// -// static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set { -// -// return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in -// -// return parsedItemWithDictionary(oneItemDictionary, feedURL) -// }) -// } -// -// static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { -// -// let externalURL = itemDictionary["link"] as? String -// let title = itemDictionary["title"] as? String -// -// var contentHTML = itemDictionary["description"] as? String -// var contentText: String? = nil -// if contentHTML != nil && !(contentHTML!.contains("<")) { -// contentText = contentHTML -// contentHTML = nil -// } -// if contentHTML == nil && contentText == nil && title == nil { -// return nil -// } -// -// var datePublished: Date? = nil -// if let datePublishedString = itemDictionary["pubDate"] as? String { -// datePublished = RSDateWithString(datePublishedString) -// } -// -// let authors = parseAuthors(itemDictionary) -// let tags = parseTags(itemDictionary) -// let attachments = parseAttachments(itemDictionary) -// -// var uniqueID: String? = itemDictionary["guid"] as? String -// if uniqueID == nil { -// -// // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result. -// // Items should have guids. When they don't, re-runs are very likely -// // because there's no other 100% reliable way to determine identity. -// // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.) -// -// var s = "" -// if let datePublished = datePublished { -// s += "\(datePublished.timeIntervalSince1970)" -// } -// if let title = title { -// s += title -// } -// if let externalURL = externalURL { -// s += externalURL -// } -// if let authorEmailAddress = authors?.first?.emailAddress { -// s += authorEmailAddress -// } -// if let oneAttachmentURL = attachments?.first?.url { -// s += oneAttachmentURL -// } -// if s.isEmpty { -// // Sheesh. Tough case. -// if let _ = contentHTML { -// s = contentHTML! -// } -// if let _ = contentText { -// s = contentText! -// } -// } -// uniqueID = (s as NSString).rsparser_md5Hash() -// } -// -// if let uniqueID = uniqueID { -// return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments) -// } -// return nil -// } -// -// static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set? { -// -// guard let authorEmailAddress = itemDictionary["author"] as? String else { -// return nil -// } -// let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) -// return Set([parsedAuthor]) -// } -// -// static func parseTags(_ itemDictionary: JSONDictionary) -> Set? { -// -// if let categoryObject = itemDictionary["category"] as? JSONDictionary { -// if let oneTag = categoryObject["#value"] as? String { -// return Set([oneTag]) -// } -// return nil -// } -// else if let categoryArray = itemDictionary["category"] as? JSONArray { -// return Set(categoryArray.compactMap{ $0["#value"] as? String }) -// } -// return nil -// } -// -// static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { -// -// guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else { -// return nil -// } -// guard let attachmentURL = enclosureObject["url"] as? String else { -// return nil -// } -// -// var attachmentSize = enclosureObject["length"] as? Int -// if attachmentSize == nil { -// if let attachmentSizeString = enclosureObject["length"] as? String { -// attachmentSize = (attachmentSizeString as NSString).integerValue -// } -// } -// -// let type = enclosureObject["type"] as? String -// if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) { -// return Set([attachment]) -// } -// return nil -// } -//} + +import Foundation +import SAX +import DateParser + +// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md +// Also: http://cyber.harvard.edu/rss/rss.html + +public struct RSSInJSONParser { + + public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + + do { + guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { + throw FeedParserError(.invalidJSON) + } + guard let rssObject = parsedObject["rss"] as? JSONDictionary else { + throw FeedParserError(.rssChannelNotFound) + } + guard let channelObject = rssObject["channel"] as? JSONDictionary else { + throw FeedParserError(.rssChannelNotFound) + } + + // I’d bet money that in practice the items array won’t always appear correctly inside the channel object. + // I’d also bet that sometimes it gets called "items" instead of "item". + var itemsObject = channelObject["item"] as? JSONArray + if itemsObject == nil { + itemsObject = parsedObject["item"] as? JSONArray + } + if itemsObject == nil { + itemsObject = channelObject["items"] as? JSONArray + } + if itemsObject == nil { + itemsObject = parsedObject["items"] as? JSONArray + } + if itemsObject == nil { + throw FeedParserError(.rssItemsNotFound) + } + + let title = channelObject["title"] as? String + let homePageURL = channelObject["link"] as? String + let feedURL = parserData.url + let feedDescription = channelObject["description"] as? String + let feedLanguage = channelObject["language"] as? String + + let items = parseItems(itemsObject!, parserData.url) + + return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) + + } + catch { throw error } + } +} + +private extension RSSInJSONParser { + + static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set { + + return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in + + return parsedItemWithDictionary(oneItemDictionary, feedURL) + }) + } + + static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { + + let externalURL = itemDictionary["link"] as? String + let title = itemDictionary["title"] as? String + + var contentHTML = itemDictionary["description"] as? String + var contentText: String? = nil + if contentHTML != nil && !(contentHTML!.contains("<")) { + contentText = contentHTML + contentHTML = nil + } + if contentHTML == nil && contentText == nil && title == nil { + return nil + } + + var datePublished: Date? = nil + if let datePublishedString = itemDictionary["pubDate"] as? String { + datePublished = DateParser.date(string: datePublishedString) + } + + let authors = parseAuthors(itemDictionary) + let tags = parseTags(itemDictionary) + let attachments = parseAttachments(itemDictionary) + + var uniqueID: String? = itemDictionary["guid"] as? String + if uniqueID == nil { + + // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result. + // Items should have guids. When they don't, re-runs are very likely + // because there's no other 100% reliable way to determine identity. + // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.) + + var s = "" + if let datePublished = datePublished { + s += "\(datePublished.timeIntervalSince1970)" + } + if let title = title { + s += title + } + if let externalURL = externalURL { + s += externalURL + } + if let authorEmailAddress = authors?.first?.emailAddress { + s += authorEmailAddress + } + if let oneAttachmentURL = attachments?.first?.url { + s += oneAttachmentURL + } + if s.isEmpty { + // Sheesh. Tough case. + if let _ = contentHTML { + s = contentHTML! + } + if let _ = contentText { + s = contentText! + } + } + uniqueID = s.md5String + } + + if let uniqueID = uniqueID { + return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments) + } + return nil + } + + static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set? { + + guard let authorEmailAddress = itemDictionary["author"] as? String else { + return nil + } + let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) + return Set([parsedAuthor]) + } + + static func parseTags(_ itemDictionary: JSONDictionary) -> Set? { + + if let categoryObject = itemDictionary["category"] as? JSONDictionary { + if let oneTag = categoryObject["#value"] as? String { + return Set([oneTag]) + } + return nil + } + else if let categoryArray = itemDictionary["category"] as? JSONArray { + return Set(categoryArray.compactMap{ $0["#value"] as? String }) + } + return nil + } + + static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { + + guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else { + return nil + } + guard let attachmentURL = enclosureObject["url"] as? String else { + return nil + } + + var attachmentSize = enclosureObject["length"] as? Int + if attachmentSize == nil { + if let attachmentSizeString = enclosureObject["length"] as? String { + attachmentSize = (attachmentSizeString as NSString).integerValue + } + } + + let type = enclosureObject["type"] as? String + if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) { + return Set([attachment]) + } + return nil + } +} diff --git a/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift index df6bef373..4412dd062 100644 --- a/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift +++ b/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift @@ -9,20 +9,20 @@ import XCTest import FeedParser -//class RSSInJSONParserTests: XCTestCase { -// -// func testScriptingNewsPerformance() { -// -// // 0.003 sec on my 2012 iMac. -// let d = parserData("ScriptingNews", "json", "http://scripting.com/") -// self.measure { -// let _ = try! FeedParser.parseSync(d) -// } -// } -// -// func testFeedLanguage() { -// let d = parserData("ScriptingNews", "json", "http://scripting.com/") -// let parsedFeed = try! FeedParser.parseSync(d)! -// XCTAssertEqual(parsedFeed.language, "en-us") -// } -//} +class RSSInJSONParserTests: XCTestCase { + + func testScriptingNewsPerformance() { + + // 0.003 sec on my 2012 iMac. + let d = parserData("ScriptingNews", "json", "http://scripting.com/") + self.measure { + let _ = try! FeedParser.parse(d) + } + } + + func testFeedLanguage() { + let d = parserData("ScriptingNews", "json", "http://scripting.com/") + let parsedFeed = try! FeedParser.parse(d)! + XCTAssertEqual(parsedFeed.language, "en-us") + } +} From ce39624a39c93ade383e294f1458e56727df50f4 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 11:47:07 -0700 Subject: [PATCH 65/88] Create first draft of SAXHTMLParser. --- .../Parser/Sources/SAX/SAXHTMLParser.swift | 255 ++++++++++++++---- 1 file changed, 204 insertions(+), 51 deletions(-) diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift index 43ba1cc38..2f860e297 100644 --- a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift +++ b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift @@ -1,54 +1,207 @@ -//// -//// SAXHTMLParser.swift -//// -//// -//// Created by Brent Simmons on 8/26/24. -//// // -//import Foundation -//import libxml2 +// SAXHTMLParser.swift +// // -//protocol SAXHTMLParserDelegate: AnyObject { +// Created by Brent Simmons on 8/26/24. // -// func saxParser(_: SAXHTMLParser, XMLStartElement localName: XMLPointer, attributes: UnsafePointer?) -// -// func saxParser(_: SAXHTMLParser, XMLEndElement localName: XMLPointer?) -// -// // Length is guaranteed to be greater than 0. -// func saxParser(_: SAXHTMLParser, XMLCharactersFound characters: XMLPointer?, length: Int) -//} -// -//final class SAXHTMLParser { -// -// fileprivate let delegate: SAXHTMLParserDelegate -// private var data: Data -// -// init(delegate: SAXHTMLParserDelegate, data: Data) { -// -// self.delegate = delegate -// self.data = data -// } -// -// func parse() { -// -// guard !data.isEmpty else { -// return -// } -// -// data.withUnsafeBytes { bufferPointer in -// -// guard let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress else { -// return -// } -// -// let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count)) -// let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding) -// htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT)) -// -// htmlParseChunk(context, bytes, Int32(data.count), 0) -// -// htmlParseChunk(context, nil, 0, 1) -// htmlFreeParserCtxt(context) -// } -// } -//} + +import Foundation +import libxml2 + +protocol SAXHTMLParserDelegate: AnyObject { + + func saxHTMLParser(_: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) + + func saxHTMLParser(_: SAXHTMLParser, endElement name: XMLPointer?) + + // Length is guaranteed to be greater than 0. + func saxHTMLParser(_: SAXHTMLParser, charactersFound characters: XMLPointer?, count: Int) +} + +final class SAXHTMLParser { + + fileprivate let delegate: SAXHTMLParserDelegate + + public var currentCharacters: Data? { // UTF-8 encoded + + guard storingCharacters else { + return nil + } + return characters + } + + // Conveniences to get string version of currentCharacters + + public var currentString: String? { + + guard let d = currentCharacters, !d.isEmpty else { + return nil + } + return String(data: d, encoding: .utf8) + } + + public var currentStringWithTrimmedWhitespace: String? { + + guard let s = currentString else { + return nil + } + return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) + } + + private var data: Data + private var storingCharacters = false + private var characters = Data() + + public init(delegate: SAXHTMLParserDelegate, data: Data) { + + self.delegate = delegate + self.data = data + } + + public func parse() { + + guard !data.isEmpty else { + return + } + + data.withUnsafeBytes { bufferPointer in + + guard let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress else { + return + } + + let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count)) + let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding) + htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT)) + + htmlParseChunk(context, bytes, Int32(data.count), 0) + + htmlParseChunk(context, nil, 0, 1) + htmlFreeParserCtxt(context) + } + } + + /// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement. + public func beginStoringCharacters() { + + storingCharacters = true + characters.count = 0 + } + + public func endStoringCharacters() { + + storingCharacters = false + characters.count = 0 + } + + public typealias HTMLAttributesDictionary = [String: String] + + public func attributesDictionary(_ attributes: UnsafePointer?) -> HTMLAttributesDictionary? { + + guard let attributes else { + return nil + } + + var dictionary = [String: String]() + var ix = 0 + var currentKey: String? = nil + + while true { + let oneAttribute = attributes[ix] + ix += 1 + + if currentKey == nil && oneAttribute == nil { + break + } + + if currentKey == nil { + if let oneAttribute { + currentKey = String(cString: oneAttribute) + } + } else { + let value: String? + if let oneAttribute { + value = String(cString: oneAttribute) + } else { + value = nil + } + + dictionary[currentKey!] = value ?? "" + currentKey = nil + } + } + + return dictionary + } +} + +private extension SAXHTMLParser { + + func charactersFound(_ xmlCharacters: XMLPointer, count: Int) { + + if storingCharacters { + characters.append(xmlCharacters, count: count) + } + + delegate.saxHTMLParser(self, charactersFound: characters, count: count) + } + + func startElement(_ name: XMLPointer, attributes: UnsafePointer?) { + + delegate.saxHTMLParser(self, startElement: name, attributes: attributes) + } + + func endElement(_ name: XMLPointer) { + + delegate.saxHTMLParser(self, endElement: name) + endStoringCharacters() + } +} + +private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, attributes: UnsafeMutablePointer?) { + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.startElement(name, attributes: attributes) +} + + +private func endElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?) { + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.endElement(name) +} + +private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) { + + guard let context, let ch, len > 0 else { + return + } + + let parser = parser(from: context) + parser.charactersFound(ch, count: Int(len)) +} + +private func parser(from context: UnsafeMutableRawPointer) -> SAXParser { + + Unmanaged.fromOpaque(context).takeUnretainedValue() +} + +nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = { + + var handler = xmlSAXHandler() + + handler.characters = charactersFound + handler.startElement = startElement + handler.endElement = endElement + handler.initialized = XML_SAX2_MAGIC + + return handler +}() From 6959e1f8912fb7d8f72da06f4cefad8124fc0c49 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 12:01:17 -0700 Subject: [PATCH 66/88] Fix build errors. --- Modules/Parser/Sources/SAX/SAXHTMLParser.swift | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift index 2f860e297..5e884d58a 100644 --- a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift +++ b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift @@ -10,12 +10,12 @@ import libxml2 protocol SAXHTMLParserDelegate: AnyObject { - func saxHTMLParser(_: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) + func saxHTMLParser(_: SAXHTMLParser, startElement: XMLPointer, attributes: UnsafePointer?) - func saxHTMLParser(_: SAXHTMLParser, endElement name: XMLPointer?) + func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) // Length is guaranteed to be greater than 0. - func saxHTMLParser(_: SAXHTMLParser, charactersFound characters: XMLPointer?, count: Int) + func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) } final class SAXHTMLParser { @@ -72,7 +72,7 @@ final class SAXHTMLParser { let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count)) let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding) - htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT)) + htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NONET.rawValue | HTML_PARSE_COMPACT.rawValue)) htmlParseChunk(context, bytes, Int32(data.count), 0) @@ -137,13 +137,13 @@ final class SAXHTMLParser { private extension SAXHTMLParser { - func charactersFound(_ xmlCharacters: XMLPointer, count: Int) { + func charactersFound(_ htmlCharacters: XMLPointer, count: Int) { if storingCharacters { - characters.append(xmlCharacters, count: count) + characters.append(htmlCharacters, count: count) } - delegate.saxHTMLParser(self, charactersFound: characters, count: count) + delegate.saxHTMLParser(self, charactersFound: htmlCharacters, count: count) } func startElement(_ name: XMLPointer, attributes: UnsafePointer?) { @@ -189,7 +189,7 @@ private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer parser.charactersFound(ch, count: Int(len)) } -private func parser(from context: UnsafeMutableRawPointer) -> SAXParser { +private func parser(from context: UnsafeMutableRawPointer) -> SAXHTMLParser { Unmanaged.fromOpaque(context).takeUnretainedValue() } From 3c4a278b42e40e0dc3c54bb28c44072efd9d83d9 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 12:16:09 -0700 Subject: [PATCH 67/88] Create HTMLParser module. --- .../xcschemes/Parser-Package.xcscheme | 14 ++++++++++++++ Modules/Parser/Package.swift | 12 ++++++++++++ Modules/Parser/Sources/HTMLParser/HTMLLink.swift | 15 +++++++++++++++ .../Sources/HTMLParser/HTMLLinkParser.swift | 16 ++++++++++++++++ 4 files changed, 57 insertions(+) create mode 100644 Modules/Parser/Sources/HTMLParser/HTMLLink.swift create mode 100644 Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme index 5ddd095ac..416ab7fe7 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme @@ -63,6 +63,20 @@ ReferencedContainer = "container:"> + + + + [HTMLLink] { + + } +} From 20b222f4555122a9872fb09a8a6b59104b9a8ec2 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 21:49:57 -0700 Subject: [PATCH 68/88] Create first draft of HTLMLinkParser. --- .../Sources/HTMLParser/HTMLLinkParser.swift | 97 +++++++++++++++++++ .../Parser/Sources/SAX/SAXHTMLParser.swift | 4 +- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift b/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift index f34cf8b7a..00ea74e97 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift @@ -10,7 +10,104 @@ import SAX public final class HTMLLinkParser { + public private(set) var links = [HTMLLink]() + + private let parserData: ParserData + private let baseURL: URL? + public static func htmlLinks(parserData: ParserData) -> [HTMLLink] { + let parser = HTMLLinkParser(parserData) + parser.parse() + return parser.links + } + + init(_ parserData: ParserData) { + + self.parserData = parserData + self.baseURL = URL(string: parserData.url) + } +} + +private extension HTMLLinkParser { + + func parse() { + + let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data) + htmlParser.parse() + } +} + +private extension HTMLLinkParser: SAXHTMLParserDelegate { + + var currentLink: HTMLLink? { + links.last + } + + struct HTMLAttributeName { + let href = "href" + let title = "title" + } + + func title(_ attributesDictionary: HTMLAttributesDictionary) -> String? { + + attributesDictionary.object(object(forCaseInsensitiveKey: HTMLAttributeName.title)) + } + + func urlString(_ attributesDictionary: HTMLAttributesDictionary) -> String? { + + guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href) else { + return nil + } + + guard let baseURL, let absoluteURL = URL(string: href, relativeTo: baseURL) else { + assertionFailure("Expected to create URL") + return nil + } + + return absoluteURL.absoluteString + } + + func handleLinkAttributes(_ attributesDictionary: HTMLAttributesDictionary) { + + guard let currentLink else { + assertionFailure("currentLink must not be nil") + return + } + + link.urlString = urlString(attributesDictionary) + link.title = title(attributesDictionary) + } + + struct HTMLName { + static let a = "a".utf8CString + } + + func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { + + guard SAXEqualTags(name, HTMLName.a) else { + return + } + + let link = HTMLLink() + links.append(link) + + if let attributesDictionary = saxHTMLParser.attributesDictionary(attributes) { + handleLinkAttributes(attributesDictionary) + } + + saxHTMLParser.beginStoringCharacters() + } + + func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) { + + guard SAXEqualTags(name, HTMLName.a) else { + return + } + currentLink.text = saxHTMLParser.currentStringWithTrimmedWhitespace + } + + func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { + // Nothing needed. } } diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift index 5e884d58a..6c845d5f1 100644 --- a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift +++ b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift @@ -8,7 +8,7 @@ import Foundation import libxml2 -protocol SAXHTMLParserDelegate: AnyObject { +public protocol SAXHTMLParserDelegate: AnyObject { func saxHTMLParser(_: SAXHTMLParser, startElement: XMLPointer, attributes: UnsafePointer?) @@ -18,7 +18,7 @@ protocol SAXHTMLParserDelegate: AnyObject { func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) } -final class SAXHTMLParser { +public final class SAXHTMLParser { fileprivate let delegate: SAXHTMLParserDelegate From a7c4669bd69bd64d18db286b539064d17ca31605 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 21 Sep 2024 22:10:47 -0700 Subject: [PATCH 69/88] Create HTMLParserTests. --- .../xcschemes/HTMLParserTests.xcscheme | 54 +++++++++++++++++++ .../xcschemes/Parser-Package.xcscheme | 10 ++++ Modules/Parser/Package.swift | 4 ++ .../Parser/Sources/HTMLParser/HTMLLink.swift | 13 +++-- .../Sources/HTMLParser/HTMLLinkParser.swift | 37 +++++++------ .../Tests/FeedParserTests/HTMLLinkTests.swift | 42 --------------- .../Tests/HTMLParserTests/HTMLLinkTests.swift | 50 +++++++++++++++++ .../HTMLMetadataTests.swift | 0 .../Resources/sixcolors.html | 0 9 files changed, 149 insertions(+), 61 deletions(-) create mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme delete mode 100644 Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift create mode 100644 Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift rename Modules/Parser/Tests/{FeedParserTests => HTMLParserTests}/HTMLMetadataTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => HTMLParserTests}/Resources/sixcolors.html (100%) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme new file mode 100644 index 000000000..64ea056f7 --- /dev/null +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme index 416ab7fe7..f37cf2377 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/Parser-Package.xcscheme @@ -126,6 +126,16 @@ ReferencedContainer = "container:"> + + + + [HTMLLink] { + public static func htmlLinks(with parserData: ParserData) -> [HTMLLink] { let parser = HTMLLinkParser(parserData) parser.parse() @@ -38,23 +38,23 @@ private extension HTMLLinkParser { } } -private extension HTMLLinkParser: SAXHTMLParserDelegate { +extension HTMLLinkParser: SAXHTMLParserDelegate { - var currentLink: HTMLLink? { + private var currentLink: HTMLLink? { links.last } - struct HTMLAttributeName { - let href = "href" - let title = "title" + private struct HTMLAttributeName { + static let href = "href" + static let title = "title" } - func title(_ attributesDictionary: HTMLAttributesDictionary) -> String? { + private func title(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? { - attributesDictionary.object(object(forCaseInsensitiveKey: HTMLAttributeName.title)) + attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.title) } - func urlString(_ attributesDictionary: HTMLAttributesDictionary) -> String? { + private func urlString(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? { guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href) else { return nil @@ -68,22 +68,22 @@ private extension HTMLLinkParser: SAXHTMLParserDelegate { return absoluteURL.absoluteString } - func handleLinkAttributes(_ attributesDictionary: HTMLAttributesDictionary) { + private func handleLinkAttributes(_ attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) { guard let currentLink else { assertionFailure("currentLink must not be nil") return } - link.urlString = urlString(attributesDictionary) - link.title = title(attributesDictionary) + currentLink.urlString = urlString(with: attributesDictionary) + currentLink.title = title(with: attributesDictionary) } - struct HTMLName { + private struct HTMLName { static let a = "a".utf8CString } - func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { guard SAXEqualTags(name, HTMLName.a) else { return @@ -99,15 +99,20 @@ private extension HTMLLinkParser: SAXHTMLParserDelegate { saxHTMLParser.beginStoringCharacters() } - func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) { + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, endElement name: XMLPointer) { guard SAXEqualTags(name, HTMLName.a) else { return } + guard let currentLink else { + assertionFailure("currentLink must not be nil.") + return + } + currentLink.text = saxHTMLParser.currentStringWithTrimmedWhitespace } - func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { + public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { // Nothing needed. } } diff --git a/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift deleted file mode 100644 index cd39f7847..000000000 --- a/Modules/Parser/Tests/FeedParserTests/HTMLLinkTests.swift +++ /dev/null @@ -1,42 +0,0 @@ -// -// HTMLLinkTests.swift -// RSParser -// -// Created by Brent Simmons on 6/25/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -import XCTest -import FeedParser - -//class HTMLLinkTests: XCTestCase { -// -// func testSixColorsPerformance() { -// -// // 0.003 sec on my 2012 iMac -// let d = parserData("sixcolors", "html", "http://sixcolors.com/") -// self.measure { -// let _ = RSHTMLLinkParser.htmlLinks(with: d) -// } -// } -// -// func testSixColorsLink() { -// -// let d = parserData("sixcolors", "html", "http://sixcolors.com/") -// let links = RSHTMLLinkParser.htmlLinks(with: d) -// -// let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php" -// let textToFind = "this week’s episode of The Incomparable" -// -// var found = false -// for oneLink in links { -// if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind { -// found = true -// } -// } -// -// XCTAssertTrue(found) -// XCTAssertEqual(links.count, 131) -// } -// -//} diff --git a/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift new file mode 100644 index 000000000..be1e03665 --- /dev/null +++ b/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift @@ -0,0 +1,50 @@ +// +// HTMLLinkTests.swift +// RSParser +// +// Created by Brent Simmons on 6/25/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import XCTest +import FeedParser +import HTMLParser +import SAX + +class HTMLLinkTests: XCTestCase { + + func testSixColorsPerformance() { + + // 0.003 sec on my 2012 iMac + let d = parserData("sixcolors", "html", "http://sixcolors.com/") + self.measure { + let _ = HTMLLinkParser.htmlLinks(with: d) + } + } + + func testSixColorsLink() { + + let d = parserData("sixcolors", "html", "http://sixcolors.com/") + let links = HTMLLinkParser.htmlLinks(with: d) + + let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php" + let textToFind = "this week’s episode of The Incomparable" + + var found = false + for oneLink in links { + if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind { + found = true + } + } + + XCTAssertTrue(found) + XCTAssertEqual(links.count, 131) + } +} + +func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { + let filename = "Resources/\(filename)" + let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! + let data = try! Data(contentsOf: URL(fileURLWithPath: path)) + return ParserData(url: url, data: data) +} diff --git a/Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/HTMLMetadataTests.swift rename to Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/sixcolors.html b/Modules/Parser/Tests/HTMLParserTests/Resources/sixcolors.html similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/sixcolors.html rename to Modules/Parser/Tests/HTMLParserTests/Resources/sixcolors.html From c348ec41b6e28009ca9ac67e6ae00ba661c34bad Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 22 Sep 2024 11:33:37 -0700 Subject: [PATCH 70/88] Make HTMLLinkTests pass. --- .../Sources/HTMLParser/HTMLLinkParser.swift | 2 +- .../Parser/Sources/SAX/SAXHTMLParser.swift | 70 +++++++++---------- .../Tests/HTMLParserTests/HTMLLinkTests.swift | 2 +- .../HTMLParserTests/HTMLMetadataTests.swift | 1 - 4 files changed, 34 insertions(+), 41 deletions(-) diff --git a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift b/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift index 6ea953618..5a04612de 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift @@ -56,7 +56,7 @@ extension HTMLLinkParser: SAXHTMLParserDelegate { private func urlString(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? { - guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href) else { + guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href), !href.isEmpty else { return nil } diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift index 6c845d5f1..d37701b18 100644 --- a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift +++ b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift @@ -66,13 +66,13 @@ public final class SAXHTMLParser { data.withUnsafeBytes { bufferPointer in - guard let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress else { + guard let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress else { return } let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count)) let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding) - htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NONET.rawValue | HTML_PARSE_COMPACT.rawValue)) + htmlCtxtUseOptions(context, Int32(HTML_PARSE_RECOVER.rawValue | HTML_PARSE_NONET.rawValue | HTML_PARSE_COMPACT.rawValue | HTML_PARSE_NOERROR.rawValue | HTML_PARSE_NOWARNING.rawValue)) htmlParseChunk(context, bytes, Int32(data.count), 0) @@ -158,37 +158,6 @@ private extension SAXHTMLParser { } } -private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, attributes: UnsafeMutablePointer?) { - - guard let context, let name else { - return - } - - let parser = parser(from: context) - parser.startElement(name, attributes: attributes) -} - - -private func endElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?) { - - guard let context, let name else { - return - } - - let parser = parser(from: context) - parser.endElement(name) -} - -private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) { - - guard let context, let ch, len > 0 else { - return - } - - let parser = parser(from: context) - parser.charactersFound(ch, count: Int(len)) -} - private func parser(from context: UnsafeMutableRawPointer) -> SAXHTMLParser { Unmanaged.fromOpaque(context).takeUnretainedValue() @@ -196,12 +165,37 @@ private func parser(from context: UnsafeMutableRawPointer) -> SAXHTMLParser { nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = { - var handler = xmlSAXHandler() + var handler = htmlSAXHandler() - handler.characters = charactersFound - handler.startElement = startElement - handler.endElement = endElement - handler.initialized = XML_SAX2_MAGIC + handler.characters = { (context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) in + + guard let context, let ch, len > 0 else { + return + } + + let parser = parser(from: context) + parser.charactersFound(ch, count: Int(len)) + } + + handler.startElement = { (context: UnsafeMutableRawPointer?, name: XMLPointer?, attributes: UnsafeMutablePointer?) in + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.startElement(name, attributes: attributes) + } + + handler.endElement = { (context: UnsafeMutableRawPointer?, name: XMLPointer?) in + + guard let context, let name else { + return + } + + let parser = parser(from: context) + parser.endElement(name) + } return handler }() diff --git a/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift index be1e03665..97d52e4cb 100644 --- a/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift @@ -7,9 +7,9 @@ // import XCTest -import FeedParser import HTMLParser import SAX +import libxml2 class HTMLLinkTests: XCTestCase { diff --git a/Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift index bf30d68c9..12343f615 100644 --- a/Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift +++ b/Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift @@ -7,7 +7,6 @@ // import XCTest -import FeedParser //class HTMLMetadataTests: XCTestCase { // From a2fc8b5dec2d5375569bbd7212abf101af5bf603 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 22 Sep 2024 11:41:08 -0700 Subject: [PATCH 71/88] Consolidate tests to a single ParserTests target. --- .../xcschemes/FeedParserTests.xcscheme | 54 - .../xcschemes/HTMLParserTests.xcscheme | 54 - .../xcschemes/OPMLParserTests.xcscheme | 54 - ...serTests.xcscheme => ParserTests.xcscheme} | 6 +- Modules/Parser/Package.swift | 20 +- .../Resources/DaringFireball.rss | 2278 ----------------- .../AtomParserTests.swift | 0 .../DateParserTests.swift | 0 .../EntityDecodingTests.swift | 0 .../FeedParserTypeTests.swift | 7 - .../HTMLLinkTests.swift | 7 - .../HTMLMetadataTests.swift | 0 .../Info.plist | 0 .../JSONFeedParserTests.swift | 0 .../OPMLTests.swift | 0 .../ParserTests.swift | 0 .../RSSInJSONParserTests.swift | 0 .../RSSParserTests.swift | 0 .../Resources/3960.json | 0 .../Resources/489.rss | 0 .../Resources/4fsodonline.atom | 0 .../Resources/DaringFireball.atom | 0 .../Resources/DaringFireball.html | 0 .../Resources/DaringFireball.json | 0 .../Resources/DaringFireball.rss | 0 .../Resources/EMarley.rss | 0 .../Resources/KatieFloyd.rss | 0 .../Resources/OneFootTsunami.atom | 0 .../Resources/ScriptingNews.json | 0 .../Resources/Subs.opml | 0 .../Resources/SubsNoTitleAttributes.opml | 0 .../Resources/YouTubeTheVolvoRocks.html | 0 .../Resources/aktuality.rss | 0 .../Resources/allthis-partial.json | 0 .../Resources/allthis.atom | 0 .../Resources/allthis.json | 0 .../Resources/atp.rss | 0 .../Resources/authors.json | 0 .../Resources/bio.rdf | 0 .../Resources/cloudblog.rss | 0 .../Resources/coco.html | 0 .../Resources/curt.json | 0 .../Resources/dcrainmaker.xml | 0 .../Resources/donthitsave.xml | 0 .../Resources/expertopinionent.atom | 0 .../Resources/furbo.html | 0 .../Resources/inessential.html | 0 .../Resources/inessential.json | 0 .../Resources/kc0011.rss | 0 .../Resources/livemint.xml | 0 .../Resources/macworld.rss | 0 .../Resources/manton.rss | 0 .../Resources/monkeydom.rss | 0 .../Resources/natasha.xml | 0 .../Resources/phpxml.rss | 0 .../Resources/pxlnv.json | 0 .../Resources/rose.json | 0 .../Resources/russcox.atom | 0 .../Resources/scriptingNews.rss | 0 .../Resources/sixcolors.html | 0 .../Resources/theomnishow.rss | 0 61 files changed, 10 insertions(+), 2470 deletions(-) delete mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme delete mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme delete mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme rename Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/{DateParserTests.xcscheme => ParserTests.xcscheme} (91%) delete mode 100755 Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/AtomParserTests.swift (100%) rename Modules/Parser/Tests/{DateParserTests => ParserTests}/DateParserTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/EntityDecodingTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/FeedParserTypeTests.swift (94%) rename Modules/Parser/Tests/{HTMLParserTests => ParserTests}/HTMLLinkTests.swift (75%) rename Modules/Parser/Tests/{HTMLParserTests => ParserTests}/HTMLMetadataTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Info.plist (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/JSONFeedParserTests.swift (100%) rename Modules/Parser/Tests/{OPMLParserTests => ParserTests}/OPMLTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/ParserTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/RSSInJSONParserTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/RSSParserTests.swift (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/3960.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/489.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/4fsodonline.atom (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/DaringFireball.atom (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/DaringFireball.html (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/DaringFireball.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/DaringFireball.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/EMarley.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/KatieFloyd.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/OneFootTsunami.atom (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/ScriptingNews.json (100%) rename Modules/Parser/Tests/{OPMLParserTests => ParserTests}/Resources/Subs.opml (100%) rename Modules/Parser/Tests/{OPMLParserTests => ParserTests}/Resources/SubsNoTitleAttributes.opml (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/YouTubeTheVolvoRocks.html (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/aktuality.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/allthis-partial.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/allthis.atom (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/allthis.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/atp.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/authors.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/bio.rdf (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/cloudblog.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/coco.html (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/curt.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/dcrainmaker.xml (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/donthitsave.xml (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/expertopinionent.atom (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/furbo.html (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/inessential.html (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/inessential.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/kc0011.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/livemint.xml (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/macworld.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/manton.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/monkeydom.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/natasha.xml (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/phpxml.rss (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/pxlnv.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/rose.json (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/russcox.atom (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/scriptingNews.rss (100%) rename Modules/Parser/Tests/{HTMLParserTests => ParserTests}/Resources/sixcolors.html (100%) rename Modules/Parser/Tests/{FeedParserTests => ParserTests}/Resources/theomnishow.rss (100%) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme deleted file mode 100644 index 7f09db509..000000000 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/FeedParserTests.xcscheme +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme deleted file mode 100644 index 64ea056f7..000000000 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/HTMLParserTests.xcscheme +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme deleted file mode 100644 index 5b3a92420..000000000 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParserTests.xcscheme +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/ParserTests.xcscheme similarity index 91% rename from Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme rename to Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/ParserTests.xcscheme index 3580eebfb..f202d73f0 100644 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/DateParserTests.xcscheme +++ b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/ParserTests.xcscheme @@ -18,9 +18,9 @@ skipped = "NO"> diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index bf3c6d23e..390026a3f 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -74,21 +74,15 @@ let package = Package( .enableExperimentalFeature("StrictConcurrency") ]), .testTarget( - name: "FeedParserTests", - dependencies: ["FeedParser"], + name: "ParserTests", + dependencies: [ + "FeedParser", + "OPMLParser", + "DateParser", + "HTMLParser" + ], exclude: ["Info.plist"], resources: [.copy("Resources")]), - .testTarget( - name: "OPMLParserTests", - dependencies: ["OPMLParser"], - resources: [.copy("Resources")]), - .testTarget( - name: "HTMLParserTests", - dependencies: ["HTMLParser"], - resources: [.copy("Resources")]), - .testTarget( - name: "DateParserTests", - dependencies: ["DateParser"]) ] ) diff --git a/Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss b/Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss deleted file mode 100755 index ba3d1400a..000000000 --- a/Modules/Parser/Tests/OPMLParserTests/Resources/DaringFireball.rss +++ /dev/null @@ -1,2278 +0,0 @@ - - -Daring Fireball -By John Gruber - - -http://daringfireball.net/feeds/main - -2016-02-28T21:06:52ZCopyright © 2016, John Gruber - Apple Product Event: Monday March 21 - - - - tag:daringfireball.net,2016:/linked//6.32173 - 2016-02-27T21:59:47Z - 2016-02-27T22:39:17Z - - John Gruber - http://daringfireball.net/ - - Kara Swisher, writing at Recode, broke the news:

- -
-

Attention Apple nerds, investors, media and everyone else who -needs to know when Tim Cook’s next product event is going to be -held: It’s going to be the week of March 21.

- -

Or to put it another way, it’s not going to be on March 15, the -time frame that other outlets previously reported, according to -several sources. It is not clear if the event was moved or if this -was the same timing as Apple had always planned.

-
- -

Swisher doesn’t have the exact date, although the <title> tag on her story reads “Apple Product Event Will Be Held March 22”. John Paczkowski (who usually gets these leaks first), confirms the week change, and says the event will be on Monday 21 March:

- -
-

Sources in position to know say the company has settled on March -21st as the date it will show off a handful of new products. These -people declined to say why Apple postponed the date by a week, but -it’s worth noting that it is one day prior to the company’s March -22 showdown with the government over a motion to compel it to help -hack the iPhone used by one of the San Bernardino terrorists.

-
- -

For what it’s worth, last year’s March event was on a Monday as well.

- -

Update: Jim Dalrymple:

- -
-

This sounds right to me.

-
- - - - ]]> - - Manuscripts and Findings - - - - tag:daringfireball.net,2016:/linked//6.32172 - 2016-02-27T00:11:11Z - 2016-02-27T00:13:11Z - - John Gruber - http://daringfireball.net/ - - My thanks to Nucleobytes for sponsoring this week’s DF RSS feed. Nucleobytes is a fascinating company. They specialize in creating Mac and iOS software for scientists and researchers, and they do it with great style — their apps have won multiple Apple Design Awards.

- -

Their latest creations are two apps for researchers, useful for anyone who researches anything from lab results, cooking recipes, or research for blog posts: Manuscripts and Findings.

- -
    -
  • Manuscripts is a writing tool that helps you concentrate on your story. Outline, plan and edit your project, insert figures, tables and math, then format citations using a killer workflow. Manuscripts supports both importing and exporting Markdown, Word, LaTeX, and HTML.

  • -
  • Findings is a lab notebook app that helps you keep a journal of your research, connected to notes, photos, and files. Plan your week, track progress, and share your findings with your colleagues or the world.

  • -
- -

Try the free basic versions, and use coupon DARINGFIREBALL for a special discount on the unlimited versions, this week only. (They have an even better offer for students.)

- - - - ]]>
-
- Donald Trump Vows to ‘Open Up’ Libel Laws - - - - tag:daringfireball.net,2016:/linked//6.32171 - 2016-02-26T21:47:27Z - 2016-02-26T21:47:28Z - - John Gruber - http://daringfireball.net/ - - Hadas Gold, writing for Politico:

- -
-

During a rally in Fort Worth, Texas, Trump began his usual tirade -against newspapers such as The New York Times and The Washington -Post, saying they’re “losing money” and are “dishonest.” The -Republican presidential candidate then took a different turn, -suggesting that when he’s president they’ll “have problems.”

- -

“One of the things I’m going to do if I win, and I hope we do and -we’re certainly leading. I’m going to open up our libel laws so -when they write purposely negative and horrible and false -articles, we can sue them and win lots of money. We’re going to -open up those libel laws. So when The New York Times writes a hit -piece which is a total disgrace or when The Washington Post, which -is there for other reasons, writes a hit piece, we can sue them -and win money instead of having no chance of winning because -they’re totally protected,” Trump said.

-
- -

Not worrisome at all. No sir.

- - - - ]]>
-
- Most Android Phones Are Not Encrypted - - - - tag:daringfireball.net,2016:/linked//6.32170 - 2016-02-26T17:43:11Z - 2016-02-28T21:06:52Z - - John Gruber - http://daringfireball.net/ - - Jose Pagliery, writing for CNN Money:

- -
-

Although 97% of Android phones have encryption as an option, less -than 35% of them actually got prompted to turn it on when they -first activated the phone. Even then, not everybody chooses that -extra layer of security.

- -

A Google spokesman said that encryption is now required for all -“high-performing devices” — like the Galaxy S7 — running the -latest version of Android, Marshmallow. But only 1.2% of Android -phones even have that version, according to Google.

- -

By comparison, most Apple products are uniformly secure: 94% of -iPhones run iOS 8 or 9, which encrypt all data. Apple (AAPL, -Tech30) makes its devices, designs the software, and retains full -control of the phone’s operating system.

- -

“If a person walks into a Best Buy and walks out with an iPhone, -it’s encrypted by default. If they walk out with an Android phone, -it’s largely vulnerable to surveillance,” said Christopher -Soghoian, the principal technologist at the American Civil -Liberties Union.

-
- -

Google is moving in the right direction, but here’s an area where the slow uptake of new versions of Android has a serious effect.

- - - - ]]>
-
- 9to5Mac: ‘Apple Likely to Drop the “5”, Call New 4-Inch Model the “iPhone SE”’ - - - - tag:daringfireball.net,2016:/linked//6.32169 - 2016-02-26T17:24:11Z - 2016-02-26T18:32:34Z - - John Gruber - http://daringfireball.net/ - - Mark Gurman:

- -
-

In January, we reported that Apple is preparing a new 4-inch -iPhone that is essentially 2013’s iPhone 5s with upgraded -internals. At the time, we heard that Apple would call the device -the “iPhone 5se” based on it being both an enhanced and “special -edition” version of the iPhone 5s. Now, we are hearing that Apple -appears to be going all in on the special edition factor: sources -say that Apple has decided to drop the “5” from the device’s name -and simply call it the “iPhone SE.” This will mark the first -iPhone upgrade without a number in its name and would logically -remove it from a yearly update cycle.

-
- -

A few points:

- -
    -
  • Apple was never going to call this phone the “5 SE”. I don’t know where Gurman got that, but that was never going to happen. Why would Apple give a new phone a name that makes it sound old?

  • -
  • Isn’t it more accurate to think of this as an iPhone 6S in a 4-inch body than as an iPhone 5S with “upgraded internals”? Other than the display, aren’t the “internals” the defining characteristics of any iPhone?

  • -
  • Dropping the number entirely fits with my theory that this phone is intended to remain on the market for 18-24 months.

  • -
- - - - ]]>
-
- Gogo Wi-Fi and Email Security - - - - tag:daringfireball.net,2016:/linked//6.32168 - 2016-02-26T17:12:34Z - 2016-02-26T19:00:17Z - - John Gruber - http://daringfireball.net/ - - Reporter Steven Petrow published a scary first-hand tale in USA Today, claiming that his email was hacked by another passenger on a Gogo-enabled flight. The implication was that you shouldn’t use email on Gogo unless you’re using a VPN.

- -

But Petrow’s email didn’t get intercepted because of some flaw with Gogo. It got intercepted because he wasn’t connecting to the POP or SMTP servers via SSL. In fact, his email provider, Earthlink, doesn’t even support SSL for email.

- -

Robert Graham at Errata Security explains:

- -
-

Early Internet stuff wasn’t encrypted, because encryption was -hard, and it was hard for bad guys to tap into wires to eavesdrop. -Now, with open WiFi hotspots at Starbucks or on the airplane, it’s -easy for hackers to eavesdrop on your network traffic. -Simultaneously, encryption has become a lot easier. All new -companies, those still fighting to acquire new customers, have -thus upgraded their infrastructure to support encryption. Stagnant -old companies, who are just milking their customers for profits, -haven’t upgraded their infrastructure.

- -

You see this in the picture below. Earthlink supports older -un-encrypted “POP3” (for fetching email from the server), but not -the new encrypted POP3 over SSL. Conversely, GMail doesn’t support -the older un-encrypted stuff (even if you wanted it to), but only -the newer encrypted version.

-
- -

Gogo is far from perfect, but it certainly wasn’t at fault in this case.

- -

Update: Like a lot of you, I’m not even sure I buy the whole story. Whole thing seems fishy.

- - - - ]]>
-
- Google, Facebook, Twitter, and Microsoft Plan to Support Apple - - - - tag:daringfireball.net,2016:/linked//6.32167 - 2016-02-25T22:56:47Z - 2016-02-25T22:56:48Z - - John Gruber - http://daringfireball.net/ - - Deepa Seetharaman and Jack Nicas, reporting for the WSJ:

- -
-

Several tech companies, including Google parent Alphabet Inc., -Facebook Inc. and Microsoft Corp., plan to file a joint motion -supporting Apple Inc. in its court fight against the Justice -Department over unlocking an alleged terrorist’s iPhone, according -to people familiar with the companies’ plans.

- -

At least one other tech company plans to be included in a joint -amicus brief next week generally supporting Apple’s position that -unlocking the iPhone would undermine tech firms’ efforts to -protect their users’ digital security, these people said. Twitter -Inc. also plans to support Apple in a motion, though it is unclear -if it will join the combined filing, another person familiar said.

- -

Microsoft President and Chief Legal Officer Brad Smith told -Congress on Thursday that his company would file a motion -supporting Apple.

-
- -

Nice.

- - - - ]]>
-
- Apple’s Motion to Vacate FBI Order - - - - tag:daringfireball.net,2016:/linked//6.32166 - 2016-02-25T20:24:56Z - 2016-02-25T20:25:28Z - - John Gruber - http://daringfireball.net/ - - A clear, cogent read. I often shy away from reading legal motions because they’re so often written in dense legalese, but this one is clear.

- -

This stuck out to me:

- -
-

Congress knows how to impose a duty on third parties to facilitate -the government’s decryption of devices. Similarly, it knows -exactly how to place limits on what the government can require of -telecommunications carriers and also on manufacturers of telephone -equipment and handsets. And in CALEA, Congress decided not to -require electronic communication service providers, like Apple, to -do what the government seeks here. Contrary to the government’s -contention that CALEA is inapplicable to this dispute, Congress -declared via CALEA that the government cannot dictate to providers -of electronic communications services or manufacturers of -telecommunications equipment any specific equipment design or -software configuration.

- -

In the section of CALEA entitled “Design of features and systems -configurations,” 47 U.S.C. § 1002(b)(1), the statute says that it -“does not authorize any law enforcement agency or officer —

- -
-

(1) to require any specific design of equipment, facilities, - services, features, or system configurations to be adopted by - any provider of a wire or electronic communication service, - any manufacturer of telecommunications equipment, or any - provider of telecommunications support services.

- -

(2) to prohibit the adoption of any equipment, facility, service, - or feature by any provider of a wire or electronic - communication service, any manufacturer of telecommunications - equipment, or any provider of telecommunications support - services.

-
-
- -

What Apple is arguing is that the All Writs Act is intended only to fill the gaps covering scenarios not covered by other laws, but CALEA (the Communications Assistance for Law Enforcement Act) is a law that was passed specifically to cover exactly this sort of scenario. This strikes me as a very compelling argument.

- - - - ]]>
-
- Microsoft Will File Amicus Brief Supporting Apple - - - - tag:daringfireball.net,2016:/linked//6.32165 - 2016-02-25T18:59:14Z - 2016-02-25T18:59:15Z - - John Gruber - http://daringfireball.net/ - - Dina Bass, reporting for Bloomberg:

- -
-

Microsoft Corp. backs Apple Inc. in its fight with the U.S. -government over unlocking a terrorist’s iPhone, said President and -Chief Legal Officer Brad Smith.

- -

The company will file an amicus brief to support Apple next week, -Smith said at a congressional hearing to discuss the need for new -legislation to govern privacy, security and law enforcement in the -age of Internet-based cloud services.

-
- -

Nice.

- - - - ]]>
-
- Apple to Tighten iCloud Backup Encryption - - - - tag:daringfireball.net,2016:/linked//6.32164 - 2016-02-25T18:02:44Z - 2016-02-25T18:02:45Z - - John Gruber - http://daringfireball.net/ - - Tim Bradshaw, reporting for the Financial Times:

- -
-

Apple is working on new ways to strengthen the encryption of -customers’ iCloud backups in a way that would make it impossible -for the company to comply with valid requests for data from law -enforcement, according to people familiar with its plans.

- -

The move would bolster Apple customers’ security against hackers -but also frustrate investigators who are currently able to obtain -data from Apple’s servers through a court order. Apple has -complied with thousands of such orders in the past.

- -

Developing such technology is in some ways more complex than -adding the kind of device-level security that Apple introduced to -the iPhone in 2014 with its iOS 8 update.

- -

Building new protections that mean Apple no longer has access to -iCloud encryption keys may inconvenience some customers. Such a -change would most likely mean that customers who forget their -iCloud password may be left unable to access their photos, -contacts and other personal information that is backed up to -Apple’s systems.

-
- - - - ]]>
-
- The Dangerous All Writs Act Precedent in the Apple Encryption Case - - - - tag:daringfireball.net,2016:/linked//6.32163 - 2016-02-25T17:07:13Z - 2016-02-25T17:07:15Z - - John Gruber - http://daringfireball.net/ - - Amy Davidson, writing for The New Yorker:

- -
-

It is essential to this story that the order to Apple is not a -subpoena: it is issued under the All Writs Act of 1789, which says -that federal courts can issue “all writs necessary or appropriate -in aid of their respective jurisdictions and agreeable to the -usages and principles of law.” Read as a whole, this simply means -that judges can tell people to follow the law, but they have to do -so in a way that, in itself, respects the law. The Act was written -at a time when a lot of the mechanics of the law still had to be -worked out. But there are qualifications there: warnings about the -writs having to be “appropriate” and “agreeable,” not just to the -law but to the law’s “principles.” The government, in its use of -the writ now, seems to be treating those caveats as background -noise. If it can tell Apple, which has been accused of no -wrongdoing, to sit down and write a custom operating system for -it, what else could it do?

-
- -

Lost amid the technical debate over encryption is the legal debate over this incredibly broad application of the All Writs Act.

- - - - ]]>
-
- Twitter’s Missing Manual - - - - tag:daringfireball.net,2016:/linked//6.32162 - 2016-02-25T16:45:49Z - 2016-02-25T16:45:50Z - - John Gruber - http://daringfireball.net/ - - Eevee:

- -
-

Here, then, is a list of all the non-obvious things about Twitter -that I know. Consider it both a reference for people who aren’t up -to their eyeballs in Twitter, and an example of how these hidden -features can pile up. I’m also throwing in a couple notes on -etiquette, because I think that’s strongly informed by the shape -of the platform.

-
- - - - ]]>
-
- Sharp Accepts Foxconn Takeover Bid - - - - tag:daringfireball.net,2016:/linked//6.32161 - 2016-02-25T05:21:30Z - 2016-02-25T17:46:42Z - - John Gruber - http://daringfireball.net/ - - Huge news for both companies. Interesting for Apple, too.

- -

Update:

- -
-

A deal to take over Japanese electronics giant Sharp by Taiwanese -manufacturer Foxconn, has been thrown into question by a last -minute delay.

- -

Foxconn said it had received new information from Sharp which -needed to be clarified.

-
- -

Whoops.

- - - - ]]>
-
- The Next Step in iPhone Impregnability - - - - tag:daringfireball.net,2016:/linked//6.32160 - 2016-02-25T03:26:27Z - 2016-02-25T04:35:17Z - - John Gruber - http://daringfireball.net/ - - Matt Apuzzo and Katie Benner, reporting for the NYT:

- -
-

Apple engineers have already begun developing new security -measures that would make it impossible for the government to break -into a locked iPhone using methods similar to those now at the -center of a court fight in California, according to people close -to the company and security experts.

- -

If Apple succeeds in upgrading its security — and experts say it -almost surely will — the company would create a significant -technical challenge for law enforcement agencies, even if the -Obama administration wins its fight over access to data stored on -an iPhone used by one of the killers in last year’s San -Bernardino, Calif., rampage. The F.B.I. would then have to find -another way to defeat Apple security, setting up a new cycle of -court fights and, yet again, more technical fixes by Apple. […]

- -

Apple built its recent operating systems to protect customer -information. As its chief executive, Timothy D. Cook, wrote in a -recent letter to customers, “We have even put that data out of our -own reach, because we believe the contents of your iPhone are none -of our business.”

- -

But there is a catch. Each iPhone has a built-in troubleshooting -system that lets the company update the system software without -the need for a user to enter a password. Apple designed that -feature to make it easier to repair malfunctioning phones.

-
- -

The way the iPhone works today, when put into recovery mode you can restore the operating system without entering the device passcode. The only restriction is that the version of iOS to be installed must be properly signed by Apple.

- -

I just tried it here with my old iPhone 6, which had been turned off for weeks. I powered it up, but did not unlock it. I put it in recovery mode, and then updated it to iOS 9.3 beta 4. Then it restarted. Now it’s running iOS 9.3 beta 4, and I still have not unlocked it. All my data is still on the phone — but it’s running a new version of iOS, without my having unlocked it.

- -

What the FBI wants Apple to do is create (and sign) a new version of iOS that they can force the San Bernardino suspect’s phone to install as an update — and this new version of iOS will allow them to easily brute-force the passcode.

- -

I think what Apple is leaking here is that they’re going to change this (perhaps as soon as this year’s new iPhone 7), so that you can’t install a new version of iOS, even in recovery mode, without entering the device’s passcode. (I think they will also do the same for firmware updates to the code that executes on the Secure Enclave — it will require a passcode lock.)

- -

If you do a full restore, you can install a new version of the OS without the passcode, but this wipes the data. See also: Activation Lock, which allows you to bypass the passcode to completely wipe an iPhone, but requires you to sign into iCloud before you can use it.

- - - - ]]>
-
- Scalia in 1987: ‘The Constitution Sometimes Insulates the Criminality of a Few in Order to Protect the Privacy of Us All’ - - - - tag:daringfireball.net,2016:/linked//6.32159 - 2016-02-25T02:53:17Z - 2016-02-25T02:53:18Z - - John Gruber - http://daringfireball.net/ - - NYT report on a 6-3 Supreme Court decision in 1987:

- -
-

Justice Scalia’s opinion was forcefully denounced as an -unjustified obstacle to law enforcement in dissenting opinions by -Associate Justices Sandra Day O’Connor and Lewis F. Powell Jr. -Chief Justice Rehnquist joined in both of the dissents.

- -

Justice Scalia, however, said, “There is nothing new in the -realization that the Constitution sometimes insulates the -criminality of a few in order to protect the privacy of us -all.” […]

- -

Justice Scalia’s majority opinion today said that although the -search for weapons was lawful — a shot had just been fired through -the floor of the apartment, injuring a man below — the police were -not justified in moving the stereo components even slightly to -check the serial numbers without “probable cause” to believe they -were stolen. He thus affirmed a ruling by an Arizona appellate -court that the stereo components, which turned out to have been -stolen in an armed robbery, could not be used as evidence against -the occupant of the apartment.

- -

Associate Justice William J. Brennan Jr., the Court’s senior -member, who is its leading liberal, apparently assigned Justice -Scalia to write the majority opinion, which he joined. Under the -Supreme Court’s procedures, the Chief Justice assigns opinions -when he is in the majority. When the Chief Justice dissents, as -in the Arizona case, the senior member of the majority has -assignment power.

-
- -

Conservative judges, as a general rule, tend to side with law enforcement in search and seizure cases. Scalia was certainly a conservative, but by no means was he in lockstep with them.

- - - - ]]>
-
- ABC News Posts Extensive Interview With Tim Cook on FBI/iPhone Case - - - - tag:daringfireball.net,2016:/linked//6.32158 - 2016-02-25T00:59:51Z - 2016-02-25T02:17:14Z - - John Gruber - http://daringfireball.net/ - - Solid, thorough, and I think very fair interview by David Muir. Cook made his case about as well as it could be made — a passionate defense of civil liberties. It’s 30 minutes long and worth every minute of it.

- - - - ]]>
-
- Former Bush Administration Official Argues Supreme Court Should Count Scalia’s Vote in Pending Cases - - - - tag:daringfireball.net,2016:/linked//6.32157 - 2016-02-24T22:46:11Z - 2016-02-24T22:46:12Z - - John Gruber - http://daringfireball.net/ - - This is how we get from here to there.

- - - - ]]>
-
- David Ortiz Makes a Final Plea to Yankees Fans - - - - tag:daringfireball.net,2016:/linked//6.32156 - 2016-02-24T22:02:58Z - 2016-02-24T22:17:46Z - - John Gruber - http://daringfireball.net/ - - Kevin Kernan, writing for the NY Post:

- -
-

When Ortiz, 40, makes his final Yankee Stadium appearance on Sept. -29, this is what he wants, and it speaks volumes about Ortiz the -player, the competitor, the enemy, the star.

- -

“You know what I want most of all?’’ Big Papi told The Post on -Tuesday at JetBlue Park. “I would love it if the fans at Yankee -Stadium gave me a standing ovation.’’

- -

That’s what he wants, and that would be the perfect tribute to -Ortiz, who owns 503 home runs.

-
- -

I would wholeheartedly join in that ovation. Great player, great rival, and his retirement really marks the end of the epic Yankees-Sox rivalry from the early 2000s. I would expect appearances from Derek Jeter, Mariano Rivera, Jorge Posada, and Joe Torre. Just thinking about it makes me want to buy tickets.

- - - - ]]>
-
- Spotify Moves Infrastructure to Google Cloud Platform - - - - tag:daringfireball.net,2016:/linked//6.32155 - 2016-02-24T03:01:04Z - 2016-02-24T03:01:06Z - - John Gruber - http://daringfireball.net/ - - You heard it here first: this presages Google acquiring Spotify. (I heard it from Om Malik first.)

- - - - ]]>
-
- Was Pew’s Polling Question on the Apple/FBI Debate Misleading? - - - - tag:daringfireball.net,2016:/linked//6.32154 - 2016-02-23T22:16:51Z - 2016-02-23T22:16:52Z - - John Gruber - http://daringfireball.net/ - - Mike Masnick, writing for TechDirt:

- -
-

The question asked was

- -
-

As you may know, RANDOMIZE: [the FBI has said that accessing the -iPhone is an important part of their ongoing investigation into -the San Bernardino attacks] while [Apple has said that unlocking -the iPhone could compromise the security of other users’ -information] do you think Apple [READ; RANDOMIZE]?

- -

(1) Should unlock the iPhone (2) Should not unlock the iPhone (3) - Don’t Know.

-
- -

But that’s not the issue in this case!

- -

As noted in the past, when it’s possible for Apple to get access -to data, it has always done so in response to lawful court orders. -That’s similar to almost every other company as well. This case is -different because it’s not asking Apple to “unlock the iPhone.” -The issue is that Apple cannot unlock the iPhone and thus, the FBI -has instead gotten a court order to demand that Apple create an -entirely new operating system that undermines the safety and -security of iPhones, so that the FBI can hack into the iPhone. -That’s a really different thing.

-
- -

He makes a good point. But when it comes to public polling on an issue like this, you can’t expect the public to understand the technical issues. Ideally, yes, the language used by Pew would have been much more precise. But basically what they were asking is “Do you think Apple should do whatever the FBI wants them to do to get the information from the San Bernardino suspect’s iPhone?” For polling purposes, I don’t think it matters much what “whatever” is.

- -

It’s true that if phrased differently, it’s quite possible you’d get a polling showing more support for Apple. But the bottom line is that a lot of Americans think Apple should just do what the FBI is asking them to do.

- - - - ]]>
-
- On Ribbons and Ribbon Cutters - - - - tag:daringfireball.net,2016:/linked//6.32153 - 2016-02-23T22:00:23Z - 2016-02-23T22:00:24Z - - John Gruber - http://daringfireball.net/ - - Jonathan Zdziarski (who has been killing it with his analysis of the Apple/FBI fight):

- -
-

With most non-technical people struggling to make sense of the -battle between FBI and Apple, Bill Gates introduced an excellent -analogy to explain cryptography to the average non-geek. Gates -used the analogy of encryption as a “ribbon around a hard drive”. -Good encryption is more like a chastity belt, but since Farook -decided to use a weak passcode, I think it’s fair here to call it -a ribbon. In any case, let’s go with Gates’s ribbon analogy. […]

- -

Instead of cutting the ribbon, which would be a much simpler task, -FBI is ordering Apple to invent a ribbon cutter — a forensic tool -capable of cutting the ribbon for FBI, and is promising to use it -on just this one phone. In reality, there’s already a line -beginning to form behind Comey should he get his way.

-
- - - - ]]>
-
- Apple to Restore UI Navigation With Pencil in Next iOS 9.3 Beta - - - - tag:daringfireball.net,2016:/linked//6.32152 - 2016-02-23T21:37:34Z - 2016-02-23T22:26:45Z - - John Gruber - http://daringfireball.net/ - - That didn’t take long. Apple, in a statement to iMore and a few other publications:

- -
-

Apple Pencil has been a huge hit with iPad Pro users, who love it -for drawing, annotating and taking notes,” an Apple spokesperson -told iMore. “We believe a finger will always be the primary way -users navigate on an iPad, but we understand that some customers -like to use Apple Pencil for this as well and we’ve been working -on ways to better implement this while maintaining compatibility -during this latest beta cycle. We will add this functionality back -in the next beta of iOS 9.3.

-
- -

One thing I take away from the vocal reaction to this: the Apple Pencil and iPad Pro have passionate users.

- - - - ]]>
-
- Apple vs. FBI: ‘Just This Once’? - - - - tag:daringfireball.net,2016:/linked//6.32151 - 2016-02-23T21:20:18Z - 2016-02-23T21:20:19Z - - John Gruber - http://daringfireball.net/ - - Julian Sanchez, writing for Just Security:

- -
-

Consider: Possibly the next iPhone simply eliminates Apple’s -ability to assist in any way. But it’s hard to imagine a -scenario where the designer and key-holder for a device designed -to be used by normal humans can do literally nothing, at the -margin, to assist an attacker. That means every improvement in -device security involves a gamble: Maybe the cost of developing -new ways to attack the newly hardened device becomes so high that -the courts recognize it as an “undue burden” and start quashing -(or declining to issue) All Writs Act orders to compel hacking -assistance. Maybe. But Apple is a very large, very rich company, -and much of the practical “burden” comes from the demands of -complying securely and at scale. The government will surely -continue arguing in future cases that the burden of complying -just this one time are not so great for a huge tech company like -Apple. (And, to quote The Smiths, they’ll never never do it -again — of course they won’t; not until the next time.)

-
- -

Sanchez makes an interesting point here about Apple being disincentivized from improving iPhone security if they lose this case. Imagine if Apple made safes, but the government could compel them to crack their own safes under warrant. The harder they make these safes to crack, the more work they bring upon themselves when compelled to crack them.

- -

I don’t think Apple would succumb to that and stop improving their device security, but it shows what an untenable position the government is trying to put Apple in. The only easy way out for Apple, if they lose, is to stop making iPhones truly secure.

- - - - ]]>
-
- High-Profile Attorney Ted Olson Joins Apple’s Fight Against FBI Terror Probe - - - - tag:daringfireball.net,2016:/linked//6.32150 - 2016-02-23T20:49:09Z - 2016-02-23T21:11:40Z - - John Gruber - http://daringfireball.net/ - - Taylor Goldenstein, reporting for the LA Times:

- -
-

Olson and Theodore J. Boutrous Jr. are the attorneys of record -representing Apple, according to a court filing. Boutrous and -Olson worked together to fight California’s previous ban on -same-sex marriage.

- -

Olson is best known for successfully arguing on behalf of George -W. Bush in the Supreme Court case Bush vs. Gore, which decided the -2000 presidential election, and for challenging California’s -Proposition 8, the measure that banned gay marriage, before the -Supreme Court.

-
- -

Olson is truly an extraordinary figure, both in terms of his career (winning landmark cases for conservatives, like Bush v. Gore and Citizens United; then winning the case that legalized gay marriage nationwide), and his personal life (his wife was a passenger on the plane that crashed into the Pentagon on 9/11).

- - - - ]]>
-
- iOS 9.3 Betas Remove the Ability to Navigate iPad UI With Apple Pencil - - - - tag:daringfireball.net,2016:/linked//6.32149 - 2016-02-23T19:24:01Z - 2016-02-23T21:39:01Z - - John Gruber - http://daringfireball.net/ - - Serenity Caldwell, at iMore:

- -
-

Unfortunately, whether by bug or intentional design, the Pencil’s -navigational prowess appears to have vanished in the iOS 9.3 -public betas. With 9.3, you can no longer scroll or manipulate -text; the only places the Pencil works are on canvas or when -pressing digital buttons.

- -

Normally, I don’t write about beta bugs and features, because it’s -a beta: There are always bugs, and features change. But this -functionality is important enough that I wanted to talk about it -before Apple submits its final 9.3 release. It could be a bug, -yes: But several betas in, we’ve seen fixes for Smart Connector -keyboards and new features, and the Pencil remains crippled. Which -makes me think, more and more, that this is a conscious decision -on the part of Apple’s engineering team. (I did reach out to the -company about the issue, and will update if and when I receive a -response.)

-
- -

Myke Hurley and CGP Grey talk about this on the latest episode of their podcast, Cortex. Grey says:

- -
-

Sources in the know confirm that removing the functionality of the -Apple Pencil is a decision inside of Apple. It is not a bug they -have overlooked for three betas. It is a decision.

-
- -

My only guess as to why Apple would change this is that they want to enable you to scroll/pan (with your finger) while drawing/marking-up with the Pencil. If so, the mistake wasn’t making this change in iOS 9.3 — the mistake was allowing the Pencil to control the UI in the first place.

- -

I hate to say it, but now that iPad Pro users have gotten used to using the Pencil to navigate the UI, maybe it should be a setting? Maybe under Accessibility? Grey, for example, says using the Pencil to navigate the UI helps him avoid RSI pain.

- -

Update, two hours later: Apple has told The Verge that UI navigation via Pencil will return in the next iOS 9.3 beta.

- - - - ]]>
-
- Bill Gates Breaks Ranks Over FBI Apple Request - - - - tag:daringfireball.net,2016:/linked//6.32148 - 2016-02-23T19:00:59Z - 2016-02-23T19:45:52Z - - John Gruber - http://daringfireball.net/ - - Stephen Foley and Tim Bradshaw, writing for The Financial Times:

- -
-

“This is a specific case where the government is asking for -access to information. They are not asking for some general -thing, they are asking for a particular case,” Mr Gates told the -Financial Times.

- -

“It is no different than [the question of] should anybody ever -have been able to tell the phone company to get information, -should anybody be able to get at bank records. Let’s say the bank -had tied a ribbon round the disk drive and said, ‘Don’t make me -cut this ribbon because you’ll make me cut it many times’.”

-
- -

Gates is so smart — surely he understands that if the FBI prevails, this will set precedent that will be used again and again. It seems to me he’s arguing that we should not be allowed to have devices protected by strong encryption.

- -

Update: Gates said today he thinks the FT mischaracterized his position, but I’m not really seeing it. He certainly isn’t siding with Apple — his stance seems, at best, lukewarm, like Sundar Pichai’s.

- - - - ]]>
-
- Poll Shows More Support for Justice Department Than for Apple - - - - tag:daringfireball.net,2016:/linked//6.32147 - 2016-02-23T18:54:44Z - 2016-02-24T03:22:36Z - - John Gruber - http://daringfireball.net/ - - Pew Research Center:

- -
-

As the standoff between the Department of Justice and Apple Inc. -continues over an iPhone used by one of the suspects in the San -Bernardino terrorist attacks, 51% say Apple should unlock the -iPhone to assist the ongoing FBI investigation. Fewer Americans -(38%) say Apple should not unlock the phone to ensure the security -of its other users’ information; 11% do not offer an opinion on -the question.

- -

News about a federal court ordering Apple to unlock the suspect’s -iPhone has registered widely with the public: 75% say they have -heard either a lot (39%) or a little (36%) about the situation.

-
- -

This is exactly why Apple’s stance on this issue is so commendable. They’re doing what they believe to be right, even though it is unpopular.

- - - - ]]>
-
- WSJ: ‘Justice Department Seeks to Force Apple to Extract Data From About 12 Other iPhones’ - - - - tag:daringfireball.net,2016:/linked//6.32146 - 2016-02-23T18:53:40Z - 2016-02-23T18:53:41Z - - John Gruber - http://daringfireball.net/ - - Devlin Barrett, reporting for the WSJ:

- -
-

The Justice Department is pursuing court orders to make Apple Inc. -help investigators extract data from iPhones in about a dozen -undisclosed cases around the country, in disputes similar to the -current battle over a terrorist’s locked phone, according to a -newly-unsealed court document.

- -

The other phones are evidence in cases where prosecutors have -sought, as in the San Bernardino, Calif., terror case, to use an -18th-century law called the All Writs Act to compel the company to -help them bypass the passcode security feature of phones that may -hold evidence, according to a letter from Apple which was unsealed -in Brooklyn federal court Tuesday. […]

- -

The letter doesn’t describe the specific types of criminal -investigations related to those phones, but people familiar with -them said they don’t involve terrorism cases. The 12 cases remain -in a kind of limbo amid the bigger, more confrontational legal -duel between the government and the company over an iPhone seized -in the terror case in California, these people said.

-
- -

But it’s really just about that one, single iPhone in the San Bernardino case.

- - - - ]]>
-
- ‘Absolutely Right’ - - - - tag:daringfireball.net,2016:/linked//6.32145 - 2016-02-23T18:36:27Z - 2016-02-23T18:39:51Z - - John Gruber - http://daringfireball.net/ - - Katie Benner and Matt Apuzzo, reporting for the NYT on whether the FBI’s request for Apple to unlock the San Bernardino shooter’s iPhone will open the door to more such requests:

- -
-

In a note posted to its website on Monday, Apple reiterated that -the government’s request seems narrow but really isn’t. “Law -enforcement agents around the country have already said they have -hundreds of iPhones they want Apple to unlock if the F.B.I. wins -this case,” the company said.

- -

To that point, the New York City police commissioner, William J. -Bratton, and the Manhattan district attorney, Cyrus R. Vance Jr., -criticized Apple after it refused to comply with the court order -and said that they currently possessed 175 iPhones that they could -not unlock.

- -

Charlie Rose recently interviewed Mr. Vance and asked if he would -want access to all phones that were part of a criminal proceeding -should the government prevail in the San Bernardino case.

- -

Mr. Vance responded: “Absolutely right.”

-
- - - - ]]>
-
- Mark Zuckerberg Stole Samsung’s Galaxy S7 Show - - - - tag:daringfireball.net,2016:/linked//6.32144 - 2016-02-23T03:46:31Z - 2016-02-23T03:46:32Z - - John Gruber - http://daringfireball.net/ - - Interesting marriage of convenience. Samsung has hardware but no interesting software. Facebook has interesting software but no hardware.

- - - - ]]>
-
- MDM Software Would Have Unlocked San Bernardino Shooter’s iPhone - - - - tag:daringfireball.net,2016:/linked//6.32143 - 2016-02-23T01:18:25Z - 2016-02-23T01:18:27Z - - John Gruber - http://daringfireball.net/ - - CBS News:

- -
-

If the technology, known as mobile device management, had been -installed, San Bernardino officials would have been able to -remotely unlock the iPhone for the FBI without the theatrics of a -court battle that is now pitting digital privacy rights against -national security concerns.

- -

The service costs $4 per month per phone.

- -

Instead, the only person who knew the unlocking passcode for the -phone is the dead gunman, Syed Farook, who worked as an inspector -in the county’s public health department.

-
- -

I had assumed they weren’t using MDM, but it’s good to have confirmation.

- - - - ]]>
-
- FBI Director James Comey Publishes Op-Ed on Apple/Encryption Case - - - - tag:daringfireball.net,2016:/linked//6.32141 - 2016-02-22T21:52:48Z - 2016-02-22T21:52:49Z - - John Gruber - http://daringfireball.net/ - - James Comey, in a brief op-ed published last night by Lawfare:

- -
-

The San Bernardino litigation isn’t about trying to set a -precedent or send any kind of message. It is about the victims and -justice. Fourteen people were slaughtered and many more had their -lives and bodies ruined. We owe them a thorough and professional -investigation under law. That’s what this is. The American people -should expect nothing less from the FBI.

-
- -

It is very difficult to take Comey’s opening sentence seriously. Everyone — on both sides of the issues — knows that this is about setting precedent.

- -
-

The particular legal issue is actually quite narrow. The relief we -seek is limited and its value increasingly obsolete because the -technology continues to evolve. We simply want the chance, with a -search warrant, to try to guess the terrorist’s passcode without -the phone essentially self-destructing and without it taking a -decade to guess correctly. That’s it. We don’t want to break -anyone’s encryption or set a master key loose on the land. I hope -thoughtful people will take the time to understand that. Maybe the -phone holds the clue to finding more terrorists. Maybe it doesn’t. -But we can’t look the survivors in the eye, or ourselves in the -mirror, if we don’t follow this lead.

-
- -

This is a purely emotional appeal. By Comey’s logic here, FBI agents should be considered above the law, able to pursue any and every avenue possible in the pursuit of information in a case with high stakes. That’s not how our system works. We are governed by the rule of law. Encryption is legal.

- -

Ultimately, that is where Comey and the FBI are going to take this. They’re going to try to make strong encryption illegal.

- - - - ]]>
-
- In Internal Email, Apple CEO Tim Cook Says Refusal to Unlock iPhone Is an Issue of Civil Liberties - - - - tag:daringfireball.net,2016:/linked//6.32140 - 2016-02-22T21:07:23Z - 2016-02-22T21:07:24Z - - John Gruber - http://daringfireball.net/ - - Tim Cook, in a company-wide memo:

- -
-

Apple is a uniquely American company. It does not feel right to be -on the opposite side of the government in a case centering on the -freedoms and liberties that government is meant to protect.

- -

Our country has always been strongest when we come together. We -feel the best way forward would be for the government to -withdraw its demands under the All Writs Act and, as some in -Congress have proposed, form a commission or other panel of -experts on intelligence, technology and civil liberties to -discuss the implications for law enforcement, national security, -privacy and personal freedoms. Apple would gladly participate in -such an effort.

-
- - - - ]]>
-
- Apple Publishes FAQ on Their Fight Against the FBI - - - - tag:daringfireball.net,2016:/linked//6.32139 - 2016-02-22T21:06:12Z - 2016-02-22T21:06:13Z - - John Gruber - http://daringfireball.net/ - - Cogent.

- - - - ]]>
-
- - - -tag:daringfireball.net,2016:/feeds/sponsors//11.32142 - - Daring Fireball Department of Commerce - - -2016-02-22T20:11:01-05:00 -2016-02-22T20:11:03-05:00 - -Everybody does research. Whether you investigate gravitational waves, do fact-finding for blog posts, study the genetics of Rhagoletis pomonella, or design the next revolutionary cupcake recipe, you are a researcher.

- -

Research needs great tools — that’s where we come in! We build innovative Mac and iOS apps for researchers, that even got us several Apple Design Awards. Today, we present our latest creations: Manuscripts and Findings.

- -
    -
  • Manuscripts is a writing tool that helps you concentrate on your story. Outline, plan and edit your project, insert figures, tables and math, then format citations using a killer workflow. Import and export of Markdown, Word, LaTeX and HTML is included.

  • -
  • Findings is a lab notebook app that helps you keep a journal of your research, connected to notes, photos and files. Plan your week, track progress, and share your findings with your colleagues… or the world.

  • -
- -

Try the free basic versions, and use coupon DARINGFIREBALL for a special discount on the unlimited versions, this week only. We hope you’ll like Manuscripts & Findings!

- -]]>
-[Sponsor] Manuscripts and Findings: Your Research Is Ready for a Big Upgrade
- The Talk Show: ‘iTools or Whatever’ - - - - tag:daringfireball.net,2016:/linked//6.32138 - 2016-02-21T23:15:40Z - 2016-02-21T23:15:43Z - - John Gruber - http://daringfireball.net/ - - For your enjoyment, a new episode of my podcast, with special guest Jim Dalrymple. Topics include the Apple/FBI legal showdown, the debate over Apple software quality, and more.

- -

Sponsored by:

- -
    -
  • Squarespace: Build it beautiful. Use code GRUBER for 10% off your first order.
  • -
  • Fracture: Photos printed in vivid color directly on glass. Use promo code TALKSHOW10 for 10% off your first order.
  • -
  • Harry’s: An exceptional shave at a fraction of the price. Use code TALKSHOW for $5 off your first purchase.
  • -
- - - - ]]>
-
- - - - tag:daringfireball.net,2016://1.32137 - 2016-02-21T22:05:40Z - 2016-02-21T22:45:23Z - - John Gruber - http://daringfireball.net/ - -The key point is that you do not have to unlock an iPhone to have it back up to iCloud. But a locked iPhone can’t back up to iCloud if the associated Apple ID password has been changed.

-]]>
- The latest news in the Apple-FBI legal fight has resulted in much confusion. John Paczkowski, reporting for BuzzFeed:

- -
-

The FBI has claimed that the password was changed by someone at -the San Bernardino Health Department. Friday night, however, -things took a further turn when the San Bernardino County’s -official Twitter account stated, “The County was working -cooperatively with the FBI when it reset the iCloud password at -the FBI’s request.”

- -

County spokesman David Wert told BuzzFeed News on Saturday -afternoon the tweet was an authentic statement, but he had nothing -further to add.

- -

The Justice Department did not respond to requests for comment on -Saturday; an Apple spokesperson said the company had no additional -comment beyond prior statements.

-
- -

Here is what the FBI wrote in its legal motion, in a footnote on the four ways Apple suggested they obtain the data they seek:

- -
-

(3) to attempt an auto-backup of the SUBJECT DEVICE with the - related iCloud account (which would not work in this case - because neither the owner nor the government knew the password - the iCloud account, and the owner, in an attempt to gain - access to some information in the hours after the attack, was - able to reset the password remotely, but that had the effect - of eliminating the possibility of an auto-backup);

-
- -

To unpack this, the “owner” is not Syed Farook, the shooter. The iPhone at the center of this was supplied by Farook’s employer, the San Bernardino County Department of Public Health. They are the “owner”. The “government” is the federal government: the FBI and the Department of Justice.

- -

The iPhone had been configured to back up to iCloud. However, at the time of the attack, it had not been backed up to iCloud for six weeks. Under warrant, Apple supplied the FBI with the data from that six-week-old backup. The FBI (for obvious reasons) would like the most recent six weeks of data from the phone, too.1

- -

iCloud backups are triggered automatically when the phone is (a) on a known Wi-Fi network, and (b) plugged-in to power. Apple’s suggestion to the FBI was that if they took the iPhone to Farook’s office and plugged it in, it might trigger a backup. If that had worked, Apple could supply the FBI with the contents of that new backup, including the most recent six weeks of data.

- -

It is not clear to me from any of the reports I have read why the iPhone had not been backed up in six weeks. It’s possible that Farook had disabled iCloud backups, in which case this whole thing is moot.2 But it’s also possible the only reason the phone hadn’t been backed up in six weeks is that it had not been plugged-in while on a known Wi-Fi network in six weeks. The phone would have to be unlocked to determine this, and the whole point of this fight is that the phone can’t be unlocked.

- -

The FBI screwed this up by directing the San Bernardino County Department of Public Health to reset Farook’s Apple ID password. They did not, and apparently could not, change anything on the phone itself. But once they reset the Apple ID password, the phone could not back up to iCloud, because the phone needed to be updated with the newly-reset Apple ID password — and they could not do that because they can’t unlock the phone.

- -

The key point is that you do not have to unlock an iPhone to have it back up to iCloud. But a locked iPhone can’t back up to iCloud if the associated Apple ID password has been changed.

- -

Again, there are two password-type things at play here. The Apple ID (iCloud) password, and the four-digit device passcode locking the iPhone. The county, at the behest of the FBI, reset the Apple ID password. This did not allow them to unlock the iPhone, and, worse, it prevented the iPhone from initiating a new backup to iCloud.

- -

How did the county reset Farook’s Apple ID password? We don’t know for sure, but the most likely answer is that if his Apple ID was his work-issued email account, then the IT department at the county could go to iforgot.apple.com, enter Farook’s work email address, and then access his email account to click the confirmation URL to reset the password.

- -

In short:

- -
    -
  • The data the FBI claims to want is on Farook’s iPhone.
  • -
  • They already have access to his iCloud account.
  • -
  • They might have been able to transfer the data on his iPhone to his iCloud account via an automated backup, but they can’t because they reset his Apple ID (iCloud) password.
  • -
- -

The only possible explanations for this are incompetence or dishonesty on the part of the FBI. Incompetence, if they didn’t realize that resetting the Apple ID password could prevent the iPhone from backing up to iCloud. Dishonesty, if they directed the county to do this knowing the repercussions, with the goal of setting up this fight to force Apple to create a back door for them in iOS. I’m not sure which to believe at this point. I’d like to know exactly when this directive to reset the Apple ID password was given — ” in the hours after the attack” leaves a lot of wiggle room.

- -
-
-
    -
  1. -

    Much (or all?) of the data stored on Apple’s iCloud backup servers is not encrypted. Or, if it is encrypted, it is encrypted in a way that Apple can decrypt. Apple has a PDF that describes the information available to U.S. law enforcement from iCloud, but to me it’s not clear exactly what is available under warrant. I would bet a large sum of money that Apple is hard at work on an iCloud backup system that does store data encrypted in a way that Apple cannot read it without the user’s Apple ID password. ↩︎

    -
  2. -
  3. -

    Another possibility: Farook’s iCloud storage was full. If this were the case, presumably Apple could have granted his account additional storage to allow a fresh backup to occur. But again, this became moot as soon as the county reset the Apple ID password at the behest of the FBI. ↩︎︎

    -
  4. -
-
- - - - ]]>
- ★ On the San Bernardino Suspect’s Apple ID Password Reset
- White House Petition to Side With Apple in FBI Fight - - - - tag:daringfireball.net,2016:/linked//6.32136 - 2016-02-21T21:38:07Z - 2016-02-21T21:38:09Z - - John Gruber - http://daringfireball.net/ - - I don’t have high hopes for this (the Obama administration seems hopelessly tied to law enforcement on this subject), but I signed:

- -
-

The FBI, is demanding that Apple build a “backdoor” to bypass -digital locks protecting consumer information on Apple’s popular -iPhones.

- -

We the undersigned, oppose this order, which has implications far -beyond the legal case at hand.

-
- - - - ]]>
-
- New York Times Publishes Report on iPhone Security and China - - - - tag:daringfireball.net,2016:/linked//6.32135 - 2016-02-20T22:36:43Z - 2016-02-20T22:39:02Z - - John Gruber - http://daringfireball.net/ - - Katie Benner and Paul Mozer, reporting for the NYT and revisiting the topic excised from a report earlier this week:

- -
-

In China, for example, Apple — like any other foreign company -selling smartphones — hands over devices for import checks by -Chinese regulators. Apple also maintains server computers in -China, but Apple has previously said that Beijing cannot view the -data and that the keys to the servers are not stored in China. In -practice and according to Chinese law, Beijing typically has -access to any data stored in China.

- -

If Apple accedes to American law enforcement demands for opening -the iPhone in the San Bernardino case and Beijing asks for a -similar tool, it is unlikely Apple would be able to control -China’s use of it. Yet if Apple were to refuse Beijing, it would -potentially face a battery of penalties.

- -

Analysts said Chinese officials were pushing for greater control -over the encryption and security of computers and phones sold in -the country, though Beijing last year backed off on some proposals -that would have required foreign companies to provide encryption -keys for devices sold in the country after facing pressure from -foreign trade groups.

- -

“People tend to forget the global impact of this,” said Raman Jit -Singh Chima, policy director at Access Now, a nonprofit that works -for Internet freedoms. “The reality is the damage done when a -democratic government does something like this is massive. It’s -even more negative in places where there are fewer freedoms.”

-
- -

Another way to look at this is a choice between the lesser of two evils. Is it a bad thing if law enforcement loses access to the contents of cell phones as state of the art for security increases? Yes. But it would be far, far worse — for entirely different reasons — if we eliminate true security by mandating back doors.

- - - - ]]>
-
- San Bernardino Officials: Apple ID Password for Terrorist’s iPhone Reset at FBI Request - - - - tag:daringfireball.net,2016:/linked//6.32134 - 2016-02-20T22:14:38Z - 2016-02-20T23:45:31Z - - John Gruber - http://daringfireball.net/ - - This story keeps getting weirder. John Paczkowski, at BuzzFeed:

- -
-

The FBI has claimed that the password was changed by someone at -the San Bernardino Health Department. Friday night, however, -things took a further turn when the San Bernardino County’s -official Twitter account stated, “The County was working -cooperatively with the FBI when it reset the iCloud password at -the FBI’s request.”

- -

County spokesman David Wert told BuzzFeed News on Saturday -afternoon the tweet was an authentic statement, but he had nothing -further to add.

- -

The Justice Department did not respond to requests for comment on -Saturday; an Apple spokesperson said the company had no additional -comment beyond prior statements.

-
- -

The additional wrinkle here is that when the FBI first revealed this, in this footnote (screenshot) of their legal motion (whole motion linked above, on “claimed”), they strongly implied that the San Bernardino Health Department did this on their own, like they were a bunch of yokels who panicked and did the wrong thing. Instead, it turns out, they were following the FBI’s instructions.

- -

The FBI says this happened “in the hours after the attack”. My question: How many hours?

- - - - ]]>
-
- DevMate by MacPaw - - - - tag:daringfireball.net,2016:/linked//6.32133 - 2016-02-20T00:23:57Z - 2016-02-20T00:23:58Z - - John Gruber - http://daringfireball.net/ - - My thanks to MacPaw for sponsoring this week’s DF RSS feed to announce that their developer platform DevMate is now available free of charge. DevMate is a single SDK that provides a slew of back-end services for Mac developers: in-app purchasing, software licensing, update delivery, crash reports, user feedback, and more. Plus real-time analytics, with sales and downloads, are available from DevMate’s dashboard.

- -

Among the indie Mac developers using DevMate for their apps are MacPaw themselves (for CleanMyMac), Smile Software, and Realmac. It’s a robust, dependable solution for developers who want to sell their Mac apps outside the App Store.

- - - - ]]>
-
- More Mac App Store Certificate Problems - - - - tag:daringfireball.net,2016:/linked//6.32132 - 2016-02-20T00:12:10Z - 2016-02-20T00:19:07Z - - John Gruber - http://daringfireball.net/ - - Lost amid the FBI/iPhone encryption hubbub was another bad week for the Mac App Store — apps just stopped launching, with the only solution being to delete the app(s) and re-install from the store. Michael Tsai (as usual) compiled a thorough roundup of information and commentary.

- - - - ]]>
-
- - - - tag:daringfireball.net,2016://1.32054 - 2016-02-04T01:28:15Z - 2016-02-04T16:33:43Z - - John Gruber - http://daringfireball.net/ - -Maybe we expect too much. But Apple’s hardware doesn’t have little problems like this.

-]]>
- Following up on Walt Mossberg’s column regarding the quality of Apple’s first-party apps, Jim Dalrymple writes:

- -
-

I understand that Apple has a lot of balls in the air, but they -have clearly taken their eye off some of them. There is absolutely -no doubt that Apple Music is getting better with each update to -the app, but what we have now is more of a 1.0 version than what -we received last year.

- -

Personally, I don’t care much about all the celebrities that Apple -can parade around — I care about a music service that works. -That’s it.

- -

If Apple Music (or any of the other software that has -problems) was the iPhone, it would never have been released in -the state it was.

-
- -

Software and hardware are profoundly different disciplines, so it’s hard to compare them directly. But it seems obvious to me that Apple, institutionally, has higher standards for hardware design and quality than it does for software.

- -

Maybe this is the natural result of the fact hardware standards must be high, because they can’t issue “hardware updates” over the air like they can with software. But the perception is now widespread that the balance between Apple’s hardware and software quality has shifted in recent years. I see a lot of people nodding their heads in agreement with Mossberg and Dalrymple’s pieces today.

- -

We went over this same ground a year ago in the wake of Marco Arment’s “Apple Has Lost the Functional High Ground”, culminating in a really interesting (to me at least) discussion with Phil Schiller at my “Live From WWDC” episode of The Talk Show. That we’re still talking about it a year later — and that the consensus reaction is one of agreement — suggests that Apple probably does have a software problem, and they definitely have a perception problem.

- -

I’ll offer a small personal anecdote. Overall I’ve had great success with iCloud Photo Library. I’ve got over 18,000 photos and almost 400 videos. And I’ve got a slew of devices — iPhones, iPads, and Macs — all using the same iCloud account. And those photos are available from all those devices. Except, a few weeks ago, I noticed that on my primary Mac, in Photos, at the bottom of the main “Photos” view, where it tells you exactly how many photos and videos you have, it said “Unable to Upload 5 Items”. Restarting didn’t fix it. Waiting didn’t fix it. And clicking on it didn’t do anything — I wanted to know which five items couldn’t be uploaded, and why. It seems to me that anybody in this situation would want to know those two things. But damned if Photos would tell me.

- -

Eventually, I found this support thread which suggested a solution: you can create a Smart Group in Photos using “Unable to upload to iCloud Photo Library” as the matching condition. Bingo: five items showed up. (Two of them were videos for which the original files couldn’t be found; three of them were duplicates of photos that were already in my library.)

- -

My little iCloud Photo Library syncing hiccup was not a huge deal — I was even lucky insofar as the two videos that couldn’t be found were meaningless. And I managed to find a solution. But it feels emblematic of the sort of nagging software problems people are struggling with in Apple’s apps. Not even the bug itself that led to these five items being unable to upload, but rather the fact that Photos knew about the problem but wouldn’t tell me the details I needed to fix it without my resorting to the very much non-obvious trick of creating a Smart Group to identify them. For me at least, “silent failure” is a big part of the problem — almost everything related to the whole discoveryd/mDNSresponder fiasco last year was about things that just silently stopped working.

- -

Maybe we expect too much from Apple’s software. But Apple’s hardware doesn’t have little problems like this.

- - - - ]]>
- ★ Apple’s App Problem
- - - - tag:daringfireball.net,2016://1.31994 - 2016-01-21T00:00:17Z - 2016-01-21T00:18:18Z - - John Gruber - http://daringfireball.net/ - -A year ago Apple sold 75 million iPhones in the fourth quarter of calendar 2015. There is no facility in the U.S. that can do that. There might not be anywhere in the world other than China that can operate at that sort of scale.

-]]>
- Arik Hesseldahl, writing for Recode on Donald Trump’s “we’re gonna get Apple to start building their damn computers and things in this country, instead of in other countries” campaign promise:

- -
-

Any honest presidential candidate regardless of party should say -clearly and indeed proudly that America doesn’t want these jobs to -come back. Final assembly jobs are low-skilled, low-paying -occupations; no American would wish to support a family on what -the jobs would pay. Workers at China’s Foxconn, which -manufacturers the iPhone, make about $402 per month after three -months of on-the-job probation. Even at the lowest minimum wage in -the U.S. — $5.15 an hour in Wyoming — American workers can’t -beat that.

-
- -

It’s not that simple. These jobs are certainly menial, but they’re not low-skill. As Tim Cook said on 60 Minutes:

- -
-

Charlie Rose: So if it’s not wages, what is it?

- -

Tim Cook: It’s skill. […]

- -

Charlie Rose: They have more skills than American workers? They -have more skills than —

- -

Tim Cook: Now — now, hold on.

- -

Charlie Rose: — German workers?

- -

Tim Cook: Yeah, let me — let me — let me clear, China put an -enormous focus on manufacturing. In what we would call, you and I -would call vocational kind of skills. The U.S., over time, began -to stop having as many vocational kind of skills. I mean, you can -take every tool and die maker in the United States and probably -put them in a room that we’re currently sitting in. In China, you -would have to have multiple football fields.

- -

Charlie Rose: Because they’ve taught those skills in their -schools?

- -

Tim Cook: It’s because it was a focus of them — it’s a focus of -their educational system. And so that is the reality.

-
- -

Wages are a huge factor, but for the sake of argument, let’s say Apple was willing to dip into its massive cash reserves and pay assembly line workers in the U.S. a good wage. Where would these U.S.-made iPhone be assembled? A year ago Apple sold 75 million iPhones in the fourth quarter of calendar 2014. There is no facility in the U.S. that can do that. There might not be anywhere in the world other than China that can operate at that sort of scale. That’s almost one million iPhones per day. 10 iPhones per second. Think about that.

- -

You can say, well, Apple could dig even deeper into its coffers and build such facilities. And train tens of thousands of employees. But why would they? Part of the marvel of Apple’s operations is that they can assemble and sell an unfathomable number of devices but they’re not on the hook for the assembly plants and facilities. When iPhones go the way of the iPod in 10 or 15 or 20 years, Apple doesn’t have any factories to close or convert for other uses. Foxconn does.

- -

The U.S. can’t compete with China on wages. It can’t compete on the size of the labor force. China has had a decades-long push in its education system to train these workers; the U.S. has not. And the U.S. doesn’t have the facilities or the proximity to the Asian component manufacturers.

- -

The only way Apple could ever switch to U.S. assembly and manufacturing would be if they automated the entire process — to build machines that build the machines. That, in fact, is what NeXT did while they were in the hardware business. But NeXT only ever sold about 50,000 computers total. Apple needed to assemble 35,000 iPhones per hour last year.

- -

So long as assembling these devices remains labor intensive, it has to happen in China. And if someday it becomes automated — if the machines are built by machines — by definition it’s not going to create manufacturing jobs.1

- -
-
-
    -
  1. -

    I do wonder about the purported Apple car. Would that be assembled in China, too? The U.S. does have automobile manufacturing expertise. And a car is so utterly unlike any product Apple has ever made that I feel like anything is possible. ↩︎

    -
  2. -
-
- - - - ]]>
- ★ Why Apple Assembles in China
- - - - tag:daringfireball.net,2015://1.31881 - 2015-12-11T21:19:40Z - 2015-12-15T00:38:58Z - - John Gruber - http://daringfireball.net/ - -Regarding Apple’s new Smart Battery Case for the iPhone 6/6S.

-]]>
- Joanna Stern tested Apple’s new Smart Battery Case for five days, and likes it a lot:

- -
-

Let’s get this out of the way: The bar for battery-case design is -extremely low. Most are chunky and made of black matte plastic, -requiring you to attach two pieces to your phone. You choose a -battery case for utility, not fashion.

- -

Apple’s Smart Battery Case, though still fairly unsightly, is -ahead of those. Bend back the top and slide in your phone. It -feels just like Apple’s smooth, soft-touch wraparound silicone -case, except… with a protruding, awkward battery on the back. The -battery juts out as if your phone will soon give birth to a -rectangular alien.

- -

Still, I’ll take it over all the ugly messes sold by Mophie, -Anker and others, especially since it provides better protection -for the phone. A lip curves just above the screen to prevent the -glass from hitting a hard surface and an interior lining provides -better shock absorption than hard plastic. Plus, the grippy -material is much easier to hold and doesn’t feel like it will -slip from my hands.

-
- -

The Verge’s Lauren Goode disagrees:

- -
-

Apple’s smart battery case is fine, then, if you want a softer -case or a “passive” battery charging experience, with zero control -over or understanding of how the case actually charges your phone. -Maybe that’s what Apple is hoping: that buyers of this thing will -slip it on and never take it off, charging their iPhones entirely -through the case’s Lightning port going forward, forgetting about -its big ol’ bump in the back. They will be pleased, finally, with -their iPhone 6’s or 6S’s battery life, and the memory of spending -an extra $99 for it, rather than having it just work that way in -the first place, will eventually fade away.

- -

It’s fine if you don’t want exterior indicator lights, or a even a -case that gives you a 0 to 100 percent charge. After all, this one -was designed for the iPhone, by the same company that made your -iPhone. For some people, that’s a big draw.

- -

In either case this will probably sell like hot cakes. It fits -nicely in holiday stockings. ’Tis the season. Just know that from -a pure performance and even a design perspective, Apple’s effort -is not the best you can get.

-
- -

(I can almost see her eyes rolling as she typed those italicized words in the second quoted paragraph.)

- -

Lewis Hilsenteger of Unbox Therapy best captured what most of us thought when we first saw it: “These things look weird.”

- -

That was certainly my first impression when I got mine Tuesday morning. The looks-like-it’s-pregnant-with-an-iPod-Touch design is certainly curious. I think to understand why it looks like this we have to ask why it even exists:

- -
    -
  • People who use their phones heavily — power users, if you will — struggle to get through a day on a single charge with the iPhone 6/6S.

  • -
  • The Plus models offer so much more battery life that getting through the day on a single charge isn’t a problem, even for power users who are on their phones all day long. But most people don’t want an iPhone that large.

  • -
  • Apple has long sold third-party battery cases in its stores, so they know how popular they are.

  • -
  • Existing battery cases all suffer from similar design problems, as outlined by Joanna Stern above. They make the entire device look and feel chunky, and most of them are built from materials that don’t feel good. None of them integrate in any way with the software on the iPhone, and most of them use micro USB instead of Lightning for charging the case.

  • -
  • Lastly, Apple claims the Smart Battery Case tackles a problem I wasn’t aware existed: that existing battery cases adversely affect cellular reception because they’re putting a battery between the phone’s antenna and the exterior of the case.

  • -
- -

So I think Apple’s priorities for the Smart Battery Case were as follows — and the order matters:

- -
    -
  1. Provides effective battery life equivalent to the iPhone 6S Plus.
  2. -
  3. Feels good in your hand.
  4. -
  5. Makes it easy and elegant to insert and remove the phone.
  6. -
  7. Works as a durable protective case.
  8. -
  9. Prevents the case’s battery from affecting cellular reception.
  10. -
  11. Looks good.
  12. -
- -

That “looks good” is last on the list is unusual for an Apple product, to say the least. Looking good isn’t always first on Apple’s list of priorities, but it’s seldom far from the top. But in this case it makes sense: Apple sells great-looking silicone and leather cases for people who aren’t looking for a battery case, and all existing third-party battery cases are clunky in some way.

- -

Ungainly though the case’s hump is, I can’t help but suspect one reason for it might be, counterintuitively, a certain vanity on the part of its designers. Not for the sake of the case itself, but for the iPhone. Third-party “thick from top to bottom” battery cases make it impossible to tell whether the enclosed phone is itself thick or thin. Apple’s Smart Battery Case makes it obvious that it’s a thin iPhone in a case which has a thick battery on the back. And I’ll say this for Apple: they are owning that hump. The hero photo of the case on the packaging is a face-on view of the back of the case.

- -

But I think the main reasons for this design are practical. The battery doesn’t extend to the top in order to accommodate the hinge design for inserting and removing the phone. Why it doesn’t extend to the bottom is a little less obvious. I suspect one reason is that that’s where the “passively coupling antenna” is.1 Extending the battery to cover it would defeat the purpose. Also, there’s a hand feel aspect to it — normally I rest the bottom of my iPhone on my pinky finger. With this case, I can rest the bottom ridge of the hump on my pinky, and it’s kind of nice. I also like putting my index finger atop the hump.

- -

So the Smart Battery Case looks weird. Typical battery cases look fat. Whether you prefer the weird look of the Smart Battery Case to the fat look of a typical case is subjective. Me, I don’t like the way any of them look. But after using the Smart Battery Case for three days, and having previously spent time using the thinnest available cases from Mophie, I feel confident saying Apple’s Smart Battery Case feels better when you’re holding it than any other battery case, both because of the material and its shape. It’s not even a close call. It also feels sturdier — this is the most protective iPhone case Apple has ever made, with rigid reinforced sides and a slightly higher lip rising above the touchscreen. The Smart Battery Case also clearly looks better from your own face-on perspective when using the phone. (Mophie’s cases look better than most, but they emboss an obnoxious “mophie” logotype on the front-facing chin. If Apple doesn’t print anything on the front face of the iPhone, why in the world would a case maker?)

- -

Patents, by the way, are a non-issue regarding the Smart Battery Case’s design. A well-placed little birdie who is perched in a position to know told me that Nilay Patel’s speculation that the unusual design was the byproduct of Apple trying to steer clear of patents held by Mophie (or any other company for that matter) are “absolute nonsense”. This birdie was unequivocal on the matter. Whether you like it, hate it, or are ambivalent about it, this is the battery case Apple wanted to make.

- -

My take is that the Smart Battery Case is an inelegant design, but it is solving a problem for which, to date, no one has created an elegant solution. Apple has simply chosen to make different severe trade-offs than the existing competition. In that sense, it is a very Apple-like product — like the hockey-puck mouse or the iMac G4.

- -

On Capacity, Simplicity, and the Intended Use Case

- -

Most battery cases have an on/off toggle switch, controlling when the case is actually charging the phone. The reason for this is that you can squeeze more from a battery case if you only charge the phone when it’s mostly depleted. Here’s a passage from Mophie’s FAQ page:

- -
-

When should I turn on my mophie case?

- -

To get the most charge out of your case, turn it on around 10%-20% -and keep the case charging without using it until your iPhone hits -80% battery life. From there, you can either wait until it gets -low again or top it off when the battery is less than 80%. Apple’s -batteries fast-charge to 80%, then switch to trickle charging for -the last 20%.

-
- -

Simplicity is a higher priority for Apple than fiddly control. If a peripheral can get by without an on/off switch, Apple is going to omit the switch. (Exhibit B: Apple Pencil.) The whole point of the Smart Battery Case is that you charge it up and put your iPhone in it and that’s it. Complaining about the lack of an on/off toggle or external charge capacity indicator lights on the Smart Battery Case reminds me of the complaints about the original iPhone omitting the then-ubiquitous green/red hardware buttons for starting and ending phone calls. Sure, there was a purpose to them, but in the end the simplification was worth it. If your iPhone is in the case, it’s charging. That’s it.

- -

Regarding the battery capacity of the case, here’s Lauren Goode, author of the aforelinked review for The Verge, on Twitter:

- -
-

A quick comparison for you: $99 Apple Battery Case 1877 mAh, -$100 Mophie Juice Pack Air 2750 mAh, $50 Incipio Offgrid Express -3000 mAh

-
- -

Nothing could better encapsulate the wrong way of looking at the Smart Battery Case than this tweet. The intended use of the Smart Battery Case is to allow prolonged, heavy use of an iPhone 6/6S throughout one day. In my testing, and judging by the reviews of others, its 1,877 mAh battery is enough for that. Adding a bigger battery would have just made it even heavier and more ungainly.

- -

And the very name of the Incipio Offgrid Express suggests that it is intended for an entirely different use case: traveling away from power for more than a day.

- -

Which in turn brings me to Tim Cook’s comments to Mashable’s Lance Ulanoff yesterday:

- -
-

Some also see the introduction of an Apple battery case as an -admission that battery life on the iPhone 6 and 6s isn’t all it -should be.

- -

Cook, though, said that “if you’re charging your phone every day, -you probably don’t need this at all. But if you’re out hiking and -you go on overnight trips… it’s kind of nice to have.”

-
- -

The Smart Battery Case would certainly help with an overnight hiking trip, but I think Cook was off-message here, because that scenario is really not what it was designed for. Big 5,000 mAh (or more) external battery chargers (or the highest capacity, extremely thick battery cases from third parties) are far better suited to that scenario than the Smart Battery Case. But Ulanoff’s preceding paragraph points to the marketing predicament inherent in a first-party Apple battery case: that it implies the built-in battery of the iPhone 6S is insufficient.

- -

The clear lesson is that it’s far better to give a phone more battery life by making the phone itself thicker and including a correspondingly thicker (and thus bigger) internal battery than by using any sort of external battery. After a few days using this case, my thoughts turn not to the Smart Battery Case itself but instead to my personal desire that Apple had made the 6/6S form factor slightly thicker. Not a lot thicker. Just a little — just enough to boost battery life around 15-20 percent or so.2 That wouldn’t completely alleviate the need for external batteries. But it would eliminate a lot of my need — my phone dies only a few times a year, but when it does, it almost invariably happens very late at night.

- -

I emphasized the word “personal” in the preceding paragraph because I realize my needs and desires are not representative of the majority. I think the battery life of the iPhone 6S as-is is sufficient for the vast majority of typical users. I suspect Cook went with the overnight hiking scenario specifically to avoid the implication that the built-in battery is insufficient. But the better explanation is that the built-in battery is insufficient for power users who use their iPhones far more than most people do.

- -

My Advice

- -

If you find yourself short on battery with your iPhone every day (or even most days), and you can’t make an adjustment to, say, put a charging dock on your desk or in your car to give your iPhone’s internal battery a periodic snack, then you should probably bite the bullet and switch to a 6S Plus. However bulky the Plus feels in your pocket and hands, it feels less bulky to me than the iPhone 6S with any battery pack. An iPhone 6S Plus, even with a normal case on it, weighs noticeably less than an iPhone 6S with the Smart Battery Case. If you need the extra battery capacity every day, you might as well get the Plus. (If you actually prefer the bigger Plus to the 4.7-inch devices, you’re in luck — you get the screen size you prefer, and a significantly longer-lasting battery. My advice here is for those who prefer the 4.7-inch size, other considerations aside.)

- -

That doesn’t describe me, however. On a typical day, my iPhone 6S seldom drops below 20 percent by the time I go to sleep. But when I’m traveling, I often need a portable battery of some sort. Cellular coverage can be spotty (which drains the battery), and when I’m away from home, I tend to do more (or even the entirety) of my daily computing on the iPhone. Conferences, in particular, can be dreadful on battery life. At WWDC my iPhone can drop to 50 percent by the time the keynote is over Monday morning.

- -

In recent years, rather than use a battery case, I’ve switched to carrying a portable external battery. My favorite for the past year or so is the $80 Mophie Powerstation Plus 2X. It’s relatively small, packs a 3,000 mAh capacity, and has built-in USB and Lightning cables. At conferences or for work travel, it’s easily stashed in my laptop bag, so my pockets aren’t weighed down at all, and my iPhone isn’t saddled with an unnatural case. If I do need to carry it in my pocket, it’s not too bad. It’s also easier to share with friends or family than a battery case. At night, I just plug the Powerstation into an AC adapter, and my iPhone into the Powerstation, and both devices get charged — no need for a separate charger or any additional cables.

- -

The big advantage to using a battery case instead of an external battery pack is that you can easily keep using your phone while it charges. That’s awkward, at best, while your phone is tethered by a cable to a small brick.

- -

If I were going to go back to using a battery case, there’s no question in my mind that I’d go with Apple’s. The only downside to it compared to Mophie’s (and the others — but I think Mophie is clearly the leader of the pack) is that it looks funny from the back. But to my eyes it doesn’t look that funny, and though third-party cases don’t look weird, they don’t look (or feel) good. In every other way, Apple’s Smart Battery Case wins: it’s all Lightning, so any Lightning peripherals you have will work, and there’s no need to pack a grody micro USB cable; it supplies more than enough additional power to get you through an active day; its unibody design makes it much easier to insert and remove the phone; and it feels much better in hand.

- -
-
-
    -
  1. -

    My understanding of how this “passively assistive antenna” works is that it takes the cellular signal and amplifies it as it passes through the case in a way that makes it easier for the iPhone’s antenna to “hear”. Sort of like the antenna equivalent of cupping your hand around your ear. I have no idea whether this is legit, or some sort of placebo marketing bullshit, but it would be interesting to see someone measure the cellular reception of (a) a naked iPhone 6S, (b) the same iPhone in a, say, Mophie battery case, and (c) the same iPhone in the Smart Battery Case. ↩︎

    -
  2. -
  3. -

    The iPhone 6 and 6S are actually 0.2mm thinner than their corresponding Plus models. That’s sort of crazy. The difference is barely perceptible, but if anything, the 6 and 6S should be a little thicker, not thinner, than the Plus models. ↩︎︎

    -
  4. -
-
- - - - ]]>
- ★ The Curious Case of the Curious Case
- - - - tag:daringfireball.net,2015://1.31795 - 2015-11-14T04:57:52Z - 2015-12-09T03:58:34Z - - John Gruber - http://daringfireball.net/ - -Take away every single iPhone sold — all of them — and Apple’s remaining business for the quarter was almost as big as Microsoft’s, bigger than Google’s, and more than four times the size of Facebook’s.

-]]>
- This piece by Bryan Clark for TheNextWeb caught my eye last weekend — “We’ve Reached — Maybe Passed — Peak Apple: Why the Narrative Needs to Change”:

- -
-

Last month, Apple’s latest earnings call announced its “most -successful year ever.” The numbers were reported, the stories -were spun and Wall Street basically anointed Apple the god of -capitalism.

- -

They’re all wrong.

-
- -

Apple wasn’t wrong — fiscal 2015 was Apple’s most successful year ever, by the objective measures of both revenue and profit. I suppose you can decide to define “most successful year ever” in terms of something else, like percentage growth or stock price gains, but revenue and profit are pretty fair measures.

- -

I missed it where “Wall Street basically anointed Apple the god of capitalism”. All I noticed was that Apple’s stock price went up about two percent the day after earnings were announced and has since fallen back to where it was before Q4 earnings were announced.

- -
-

The actual story, the story we should be telling, involves a -different narrative. Apple is the largest company in the world, -but success is fleeting. While the numbers are impressive, they -don’t come close to painting an accurate picture about how much -trouble Apple is really in.

- -

Apple’s rise under Steve Jobs was historic. Its fall under Tim -Cook is going to be much slower, more painful.

-
- -

The fall usually is more painful than the rise. Who writes a sentence like that?

- -

And if Apple’s fall under Cook is much slower than its rise under Steve Jobs, it’s going to take 20 or 30 years. Apple’s revival was long, slow, and relatively steady.

- -
-

Apple lives and dies by the iPhone. iPad sales are flat, -iPod’s are all but irrelevant, and while Mac sales are up, -they’re nowhere close to the workhorse that can continue to -carry Apple should they experience a downturn in iPhone sales. -There is no Plan B.

- -

One look at the numbers tells a pretty decisive tale.

- -

Percentage of revenue derived from iPhone sales:

- -
    -
  • 2012: 46.38%
  • -
  • 2013: 52.07%
  • -
  • 2014: 56.21%
  • -
  • 2015: 62.54%
  • -
-
- -

This is the part of Clark’s piece that got my attention. It’s a common refrain these days — just search Google for “Apple is too dependent on the iPhone”.

- -

Clark makes it sound like this is because the rest of Apple’s business is in decline, whereas the truth is that the iPhone continues to grow at an astonishing rate that even Apple’s other successful products can’t match. Is it worrisome that iPad sales continue to decline? Sure. Would it be better for Apple if the iPad were selling in iPhone-esque quantities? Of course. But iPad still sold 9.9 million units and generated $4.3 billion in revenue last quarter.

- -

Arguing that Apple is in trouble because the iPhone is so popular is like arguing that the ’90s-era Chicago Bulls were in trouble because Michael Jordan was so good. It’s true Jordan couldn’t play forever — and the iPhone won’t be the most profitable product in the world forever. But in the meantime, the Bulls were well-nigh unbeatable, and Apple, for now at least, is unfathomably profitable.1 Just like how it’s better to have loved and lost than never to have loved at all, it’s better to have tremendous success for some period of time than never to have had tremendous success in the first place. Right?

- -

What I don’t get is why Apple gets singled out for its singular success, but other companies don’t. 92 percent of Google’s revenue last year came from online advertising. And more importantly, I don’t get why Apple’s non-iPhone businesses are so quickly written off only because they’re so much smaller than the iPhone.

- -

Apple’s total revenue for last quarter was $51.5 billion. The iPhone accounted for $32.2 billion of that, which means Apple’s non-iPhone business generated about $19.3 billion in revenue. All of Microsoft in the same three months: around $21 billion. All of Google: $18.78 billion. Facebook: $4.5 billion. Take away every single iPhone sold — all of them — and Apple’s remaining business for the quarter was almost as big as Microsoft’s, bigger than Google’s, and more than four times the size of Facebook’s. And this is for the July-September quarter, not the October-December holiday quarter in which Apple is strongest.

- -

Nothing in the world compares to Apple’s iPhone business, including anything else Apple makes. But a multi-billion-per-quarter business here (Mac), a multi-billion-per-quarter business there (iPad), a “Services” division that generates more revenue than Facebook, and an “Other” category (Watch, Apple TV, Beats, iPod) that booked $3 billion in a non-holiday quarter — and it’s clear that Apple’s non-iPhone businesses, combined, amount to a massive enterprise.

- -

Here’s a Larry Dignon column about whether iPad Pro will make “iPad material to Apple again”:

- -
-

Apple’s iPad sales are on the borderline of being immaterial to -the company, but some analysts are betting that enterprise sales -of the iPad Pro can turn the product line around. […]

- -

Nevertheless, the iPad franchise is sucking wind relative to the -iPhone. Apple’s annual report shows the iPad is 10 percent of -overall sales. Once a business falls below 10 percent a company -doesn’t have to break it out. In other words, the iPad could be -lumped into “other” with the Apple Watch and iPod if current -trends continue.

-
- -

This is a product line that, in and of itself, generated just about exactly the same revenue last quarter as all of Google’s non-advertising business did for the entire fiscal year. But Apple is the company that is considered lopsided and worrisomely dependent upon a single product.

- -

Name a product introduced in the last five years that has been more successful than the iPad — either in terms of revenue and profit for its maker, or in terms of aggregate hours of daily use and customer satisfaction of its users. I can’t think of one.

- -

Now consider the Apple Watch. Fast Company called it “a flop” back in July. Here’s a guy on Quora — Jason Lancaster, editor of a website called Accurate Auto Advice — answering, in the affirmative, whether Apple has “already lost the market for self driving cars” (not joking):

- -
-

Third, Apple may have peaked. Call me a hater, but what reason is -there to assume Apple’s reputation is going to stay where it is? -The watch was a flop, and their only consistent source of success -is the iPhone, as the market for Macs and iPads is drying up (as -it is for all computer hardware companies).

-
- -

Forget the fact that Mac sales are growing, or that iPad sales, though in decline, remain roughly 10 million per quarter. What I enjoy about this is Lancaster’s having written off the Watch as a flop — he even uses the past tense.

- -

Here’s what that flop looks like:

- -
-

Apple has shipped seven million Apple Watches since its -introduction this spring, giving the technology giant a firm lead -in the nascent smartwatch market, according to researcher Canalys.

- -

That number falls shy of some Wall Street analysts’ expectations -for Apple’s first new device category since 2010. But, for -perspective, consider this: Apple sold more smartwatches from -April through September than all other vendors combined sold over -the past five quarters, Canalys reports.

-
- -

If we estimate the average selling price for an Apple Watch at $500 (reasonable), that’s $3.5 billion in revenue for the year to date — prior to the holiday quarter that is almost certainly going to be the strongest for watch sales annually.

- -
- -

Back to Bryan Clark’s TheNextWeb piece:

- -
-

Steve Jobs is almost entirely responsible for Apple’s cult-like -following.

- -

By streamlining the company in an attempt to make it profitable, -the same vision started to makes its way through every product -Apple created. Rather than bloated and flashy, Jobs created a -movement of decidedly minimalist devices that required not much -more than an occasional charge and a user that knew where the -power button was.

- -

Between aesthetically pleasing design, rock-solid hardware, and -software that responded as if it were built for the machine — -not in spite of it — Apple culture became a cult of -Jobs-worshipping consumers willing to buy anything with a -lowercase “i” in front of it.

-
- -

That never happened. The G4 Cube didn’t sell. iPod Hi-Fi didn’t sell. Those weren’t just non-hit products — they were both products that Steve Jobs himself really liked. I’ve heard that he had a stack of unopened iPod Hi-Fis in his office. Apple products have never been blindly accepted by the mass market — they’ve succeeded on their merits and by meeting actual demand. As I wrote two years ago:

- -
-

To posit that Apple customers are somehow different, that when -they feel screwed by Apple their response is to go back for more, -is “Cult of Mac” logic — the supposition that most Apple -customers are irrational zealots or trend followers who just -mindlessly buy anything with an Apple logo on it. The truth is the -opposite: Apple’s business is making customers happy, and keeping -them happy. They make products for discriminating people who have -higher standards and less tolerance for design flaws or -problems.

-
- -

Clark finally tells us what Apple’s biggest problems are:

- -
-

There are larger issues on the horizon: For example, how does -Apple compete with Windows and Android?

- -

Both have proven to be amazingly adept in recent years not only at -competing with Apple in form factor, but functionality as well.

- -

Two companies that are innovating, not searching for identity -outside of a singular product.

- -

Two companies that are on the way up, not down.

-
- -

Windows and Android, got it.

- -
-

The Apple Watch is great, but it’s never going to carry Apple like -the iPhone until it works like one. The watch is undeniably cool, -but it really fails to do anything better than your phone.

- -

To make matters worse, you have to have an iPhone close by in -order to even use most of its features. Similar Android models are -self-contained and only require an occasional sync.

- -

The autonomous car project sounds promising, but competing against -Google and Tesla in addition to auto industry giants like Lexus -and Mercedes is an uphill battle full of technology challenges, -government red tape and changing century-old transportation -conventions.

-
- -

The best I can gather from this mishmash of a conclusion is that Apple Watch should have somehow debuted as a first-generation product that could stand toe-to-toe with the iPhone (which is now in its ninth generation), and that Apple’s car product should already be here. If there were no rumors of an Apple car, we’d be hearing that Apple is going to miss out on the next big industry that is ripe for disruption from the tech industry. But because there are rumors and hints pointing to an Apple car, we’re hearing that cars are too difficult, the established companies too entrenched. Ed Colligan’s line for the ages — “PC guys are not going to just figure this out. They’re not going to just walk in.” — was also about an industry full of longstanding giants, Google, technology challenges, government red tape, and century-old conventions. Minus the “government red tape”, that’s a pretty good description of the watch and home entertainment system industries, too.

- -

I’m not here to argue the opposite of Colligan — that Apple’s success in these new fields is preordained — because that would be foolish. But it’s just as foolish to argue that Apple can’t succeed — or that anything less than iPhone-sized success in a new endeavor is a failure.

- -
-
-
    -
  1. -

    The iPhone, however, is unlikely to take a year off in the prime of its career to play baseball↩︎

    -
  2. -
-
- - - - ]]>
- ★ What Goes Up
- - - - tag:daringfireball.net,2015://1.31778 - 2015-11-11T13:08:58Z - 2015-11-13T08:05:24Z - - John Gruber - http://daringfireball.net/ - -The future of mass market portable computing involves neither a mouse pointer nor an x86 processor.

-]]>
- First impressions last a lifetime, goes the adage. You’re going to have to forget your first impressions of the iPad to understand the iPad Pro.

- -

When Apple introduced the original iPad in 2010, it was explicitly positioned in a new role for a device — somewhere between an iPhone and a MacBook. That seems obvious, but the problem, for the iPad, is that people loved their iPhones and MacBooks. The only way iPad would succeed, Steve Jobs said, was if it were “far better at doing some key things” than either an iPhone or MacBook.

- -

Apple succeeded. Simply by nature of having a bigger display, the iPad was better than the iPhone for numerous tasks — watching videos or reading long-form text, to name just two. No one would dispute that bigger displays are better for certain tasks — you can prove the productivity gains.

- -

What made the iPad better than a MacBook, in at least some ways, was more subjective than objective. Objectively, a MacBook was faster, by a large factor, could multitask, and offered a rich library of serious productivity apps. A Mac was, simply put, more powerful than an iPad — both in terms of hardware and software. The iPad had some objective advantages — battery life and the pixel density of its display are two that come to mind.1

- -

The trade-offs were obvious. The iPad offered the same conceptual simplicity and intimacy as the iPhone, with the “lean-back” ergonomics of a tablet, at the cost of power — hardware performance and software complexity.

- -

It was, in short, just a big iPhone. To the eyes of many in the tech industry, “just a big iPhone” was damning. They wanted the iPad to impress in terms of power. To the eyes of tens of millions of users, however, “just a big iPhone” was strong praise. An iPhone with a 10-inch display sounded just great.

- -

The intervening five years have turned all of this upside down. The iPad Pro now impresses solely by dint of its engineering. Anyone who doesn’t see this is blinded by their established impressions of the first few iPads.

- -

For the moment, put aside the form factor differences (tablet with optional keyboard vs. hinged clamshell), conceptual differences in iOS and OS X (direct touchscreen manipulation of full-screen apps vs. a mouse pointer and tiled windows) and software differences (simpler iOS apps vs. more complex OS X apps). All those points are worth consideration, but for now, put them aside. Right now, today, the iPad Pro is a peer to the current lineup of MacBooks in terms of computational hardware performance.

- -

The iPad Pro is without question faster than the new one-port MacBook or the latest MacBook Airs. I’ve looked at several of my favorite benchmarks — Geekbench 3, Mozilla’s Kraken, and Google’s Octane 2 — and the iPad Pro is a race car. It’s only a hair slower than my year-old 13-inch MacBook Pro in single-core measurements. Graphics-wise, testing with GFXBench, it blows my MacBook Pro away. A one-year-old maxed-out MacBook Pro, rivaled by an iPad in performance benchmarks. Just think about that. According to Geekbench’s online results, the iPad Pro is faster in single-core testing than Microsoft’s new Surface Pro 4 with a Core-i5 processor. The Core-i7 version of the Surface Pro 4 isn’t shipping until December — that model will almost certainly test faster than the iPad Pro. But that’s a $1599 machine with an Intel x86 CPU. The iPad Pro starts at $799 and runs an ARM CPU — Apple’s A9X. There is no more trade-off. You don’t have to choose between the performance of x86 and the battery life of ARM.

- -

We’ve now reached an inflection point. The new MacBook is slower, gets worse battery life, and even its cheapest configuration costs $200 more than the top-of-the-line iPad Pro. The iPad Pro is more powerful, cheaper, has a better display, and gets better battery life. It’s not a clear cut-and-dry win — MacBooks still have more RAM (the iPad Pro, in all configurations, has 4 GB of RAM, although Apple still isn’t publishing this information — MacBook Pros have either 8 or 16 GB), are expandable, and offer far more storage. But at a fundamental level — CPU speed, GPU speed, quality of the display, quality of the sound output, and overall responsiveness of interface — the iPad Pro is a better computer than a MacBook or MacBook Air, and a worthy rival to the far more expensive MacBook Pros.

- -

The entire x86 computer architecture is living on borrowed time. It’s a dead platform walking. The future belongs to ARM, and Apple’s A-series SoC’s are leading the way.

- -

The A9X didn’t come out of nowhere. Watching Apple’s A-series chips gain on x86 over the past five years, we’ve all been speculating about whether Apple might someday start using ARM chips in MacBooks. As of now, it’s only a question of whether they want to.

- -

What Apple Means by ‘Pro’

- -

With the Mac Pro, the “pro” really does stand for “professional”. There’s pretty much no reason for anyone to buy a Mac Pro unless their work is computationally expensive. There aren’t many people left whose work is slowed down regularly by the performance of their computer. The Mac Pro is aimed at that market. (That said, a higher-end iMac will outperform a Mac Pro in many tasks that aren’t well-suited to multicore parallel computing. The Mac Pro is due for an update.)

- -

With the MacBook Pro, on the other hand, “pro” isn’t really short for “professional”. It’s more like “deluxe” — a signifier that it’s a higher-end product than its non-pro siblings. Faster, better, and accordingly higher-priced. A MacBook Pro with 1 TB of SSD storage is indeed a terrific portable computer for “professional” use by, say, a photographer or film editor or software developer — people who truly stretch the performance of any computer today, portable or otherwise. But a decked-out MacBook Pro is also a terrific and perfectly reasonable choice for anyone who can simply afford one. MacBook Airs don’t have retina displays (and likely will never be upgraded to offer them), and the one-port MacBook is relatively slow.

- -

The iPad Pro is “pro” in the way MacBook Pros are. Genuine professionals with a professional need — visual artists in particular — are going to line up for them. But it’s also a perfectly reasonable choice for casual iPad users who just want a bigger display, louder (and now stereo) speakers, and faster performance.

- -

Anyone tying themselves in knots looking for a specific target audience for the iPad Pro is going about it the wrong way. There is no single target audience. Is the iPad Pro meant for office workers in the enterprise? Professional artists creating content? Casual users playing games, watching movies, and reading? The answer is simply “Yes”.

- -

Smart Keyboard and Converting to a Laptop Form Factor

- -

So unlike the original iPad of 2010, which carved out new territory between that of an iPhone and MacBook, the iPad Pro is clearly an alternative to a MacBook. I’m sure someone out there will carry both a MacBook (of any sort) and an iPad Pro while traveling, but I don’t really see the sense of that. The iPad Mini makes perfect sense as a travel companion to a MacBook. The iPad Air does too — especially for someone who watches a lot of video or prefers larger type while reading. But the iPad Pro stands as an alternative to a MacBook. If you want to carry a MacBook, you want a smaller, lighter iPad as a companion, and you don’t need a keyboard for it. If you want to carry an iPad Pro, you might as well get the Smart Keyboard cover and leave the MacBook at home.

- -

The trade-offs are varied. If you don’t type much, or don’t mind using the on-screen keyboard when you do, you’re probably already sold on the iPad-as-primary-portable-computer lifestyle. If you do type a lot and want a hardware keyboard, the appeal of the iPad Pro is going to largely hinge on your affinity for the Smart Keyboard.

- -

I’ve been using this iPad Pro review unit (128 GB, with cellular — top of the line kit, natch) for eight days, and most of that time I’ve had the Smart Keyboard attached. For just plain typing, it’s not that bad — I’ve written this entire review using it, Federico Viticci-style. I went into it thinking that my biggest complaint would be the keys themselves — I like my keyboards clicky, with a lot of travel. But I adjusted to it pretty quickly, and I kind of like the way it feels, as a tactile surface. It almost feels like canvas.

- -

My complaints and frustrations are more from the software, both iOS 9.1 itself and individual apps, both from Apple and third-party developers. Trying to use the iPad Pro as a laptop with the Smart Keyboard exposes the seams of an OS that was clearly designed for touchscreen use first. These seams aren’t new — I’m sure anyone who has tried using an iPad of any sort with a paired Bluetooth keyboard has run into the same things. This is simply the first time I’ve tried using an iPad with a hardware keyboard for an extended period for large amounts of work.

- -

I almost wrote “for large amounts of writing” in the preceding paragraph, but the problems with an iPad and a hardware keyboard are more than about typing. A large part of my work is reading, and with a laptop, the keyboard is a big part of the reading experience. In fact, with the iPad Pro, the keyboard is even more important than it is on a MacBook — and today, it falls short.

- -

Here’s what I mean. First, when the iPad Pro is open with the keyboard attached, holding your arm up to touch the screen for anything longer than a moment or two is ergonomically uncomfortable. Apple has stated for years that this is why they don’t make the displays on MacBooks or iMacs touchscreens (that, combined with the relatively tiny click targets of Mac OS X, which are designed for very precise mice and trackpads, not imprecise finger tips). Scrolling through a long document using the iPad Pro touch screen is uncomfortable when it’s in laptop position. Going through a slew of new emails, likewise. In laptop mode, I want to use the keyboard for these things — and in most cases, because of bugs and/or software limitations, I can’t. That the keyboard falls short in these cases is even worse on iPad than it would be on a MacBook, because a MacBook has a trackpad. The point is, if my fingers are on the keyboard, I don’t want to move my hands. With a trackpad, I don’t have to. With the iPad Pro, I do.

- -

It’s an ancient (meaning dating back to the Classic era) Mac convention that in a read-only scrolling view, you can use the space bar to page down. When your eyes get to the bottom of the display, you can just hit space and the view should scroll to show the next screen full of content — with the last line or two of the previous screen now repeated at the top of the new screen to provide context as your eyes move from the bottom to the top of the display. This works almost everywhere on OS X, and anywhere it doesn’t work should be considered a bug.

- -

On iOS 9.1, Safari tries to support this, but it is dreadfully buggy. Instead of paging down just less than one screen-height of content, it pages down about 1.5 screen-heights of content. It literally scrolls right past huge amounts of content, rendering the feature completely unusable.

- -

Here’s a sample page I’ve created to illustrate. It’s just a simple text file with 1,000 lines, numbered in order. When I view that on my MacBook Pro, I see lines 1–45 (and half of line 46). When I hit space to page, the view scrolls and I now see lines 44–89. Hit space again and the view scrolls to show lines 88–132.

- -

On iPad Pro, I see lines 1–49 initially. But when I hit space to page down, the view scrolls to show me lines 75–123. Lines 50–74 are completely skipped past. It’s not even just a line or two — it’s huge chunks of text. This happens in all web pages in Safari on iOS 9.1, and it is not specific to the iPad Pro and Smart Keyboard. I see the exact same behavior on any iPad with a paired Bluetooth keyboard.

- -

Mail is another app in which, on my Macs, I depend heavily on the keyboard for scrolling and selection. On iPad, Mail does let you move from message to message using the keyboard (⌘↓ and ⌘↑), but it doesn’t support scrolling the actual message content — the space bar does nothing, and the Smart Keyboard doesn’t have a proper Page Down key.

- -

The space bar doesn’t work as a Play/Pause toggle for audio or video, either. I think it should.

- -

I don’t think it’s inherently problematic that iOS has no conceptual support for a mouse pointer, and thus can’t work with any sort of trackpad. But, given this constraint, good support for navigating as much of the UI as possible using the keyboard is more important on the iPad than it is on the Mac. But iOS’s support for navigating using the keyboard is worse.

- -

Another problem: when editing a long document, if you use the arrow keys to move the insertion point above the first line on screen or below the last line on screen, the insertion point just disappears off screen. The view doesn’t scroll to keep the insertion point visible, which is clearly what should happen (and does happen on OS X). Surely iOS will work this way eventually, but right now it still shows its roots as a touchscreen OS where a hardware keyboard is a decided second-class citizen.

- -

All is not lost, however. ⌘-Tab works for app switching just like it does on the Mac. Tap it and release and you switch to the most-recently used app. Tap it and keep holding down ⌘ and you get a visual switcher showing the 10 most-recently-used apps. (Again, this works with any hardware keyboard connected to any iPad — it’s just that this has been the first time it’s been relevant to me, personally.) The Smart Keyboard lacks a Home button, but there is a system-wide shortcut that maps ⌘-Shift-H to “Home”. Not bad, but once you’re at the iOS home screen, there’s not much you can do without touching the screen. For a few days, I sort of wished that I could use the arrow keys to navigate the home screen, with the selected app icon popping “up” like in the “focus” UI of the new Apple TV. But that idea, I suspect, is too far afield from the regular touchscreen-based UI of the iOS home screen. My keyboard idea requires a select-then-act two-stage model — the regular touch-based launcher is single-stage: just tap.

- -

But then I realized that the problem I wanted to solve wasn’t that I wanted the home screen to be keyboard-navigable. The problem was that I wanted to use the keyboard to launch apps that weren’t listed in the ⌘-Tab switcher. To do that on iOS without a hardware keyboard, you go home, then tap the app. With a keyboard, though, you can do it, just in a different way.

- -

Hit ⌘-Space system wide, and you’ll be taken to the home screen’s system-wide “Quick Search”. It’s like the iOS equivalent of Spotlight. Start typing the name of the app you want to launch, and there it is.

- -

But go ahead and play a sad trombone wah-wah here, because at this point, you still have to pick your arm up and touch the screen to launch the app. You can also use Quick Search for starting a web search in Safari, or anything else. But you can’t use the keyboard arrow keys to navigate the list of results. (Another problem with Quick Search using the keyboard: you have to wait a second or so for the Quick Search text field to accept input. I’m pretty sure it’s because we’re waiting for the animation to complete — first to show the home screen, then to jump to Quick Search. So if you type ⌘-Space and immediately begin typing what you’re looking for, the first few characters you type are lost. The user should never have to wait for the computer, especially if it’s just for an animation. Any Mac user with muscle memory trained by LaunchBar, Alfred, Quicksilver, or even Spotlight is going to find this enforced delay on iOS maddening.)

- -

This lack of keyboard support is prevalent system-wide. In Messages, if you start a new conversation and type the partial name of a contact, you can’t select from the list of matches using arrow keys or auto-complete the name you’ve partially typed using Tab. You’ve got to — you guessed it — reach up and touch the screen. You can use the arrow keys to select from a list of suggestions in the recipients fields in Mail, however, and arrow keys also work for selecting from the list of suggestions in the Safari location field.

- -

The bottom line is that the potential of the iPad Pro as a laptop is tremendous. The keyboard is just fine for typing, and the magnetic connection between the iPad Pro and the keyboard is surprisingly sturdy. You can absolutely use it as a literal laptop without any worry that the iPad Pro is going to fall off the Smart Keyboard. I even like the 4:3 aspect ratio — it shows more lines of text when reading than my 13-inch MacBook Pro. It also occupies a smaller footprint than an open MacBook Pro, meaning it should fit better on the seatback tray of an airplane. But the lack of pervasive support for keyboard-based UI navigation in iOS is a problem for anyone with longstanding Mac keyboard shortcuts ingrained in their muscle memory.

- -

As an actual cover, the Smart Keyboard does feel thick, and when closed, it bothers me a little that it’s thicker on the outer two thirds (where the keyboard is folded under) than the inner third. I wouldn’t recommend the Smart Keyboard for anyone who doesn’t plan to actually use the keyboard quite a bit. But if you do plan on using the keyboard frequently, the trade-off in thickness (compared to the non-keyboard Smart Cover) is well worth it.

- -

(It occurs to me that for many people, the Smart Keyboard might best be thought of not as a thick cover, but as a thin very portable desktop docking station.)

- -

Keyboard Bugs

- -

I experienced some flakiness with the keyboard throughout the week. Sometimes, system-wide keyboard shortcuts would stop working: ⌘-Tab, ⌘-Space, and ⌘-Shift-H. Typing within apps still worked, and keyboard shortcuts within any given app still worked, but the system-wide shortcuts inexplicably stopped working.

- -

Less frequently, I’ve seen the opposite problem: the system-wide keyboard shortcuts work, but keyboard shortcuts within any given app stop working. (iOS 9 has a very clever feature, by the way: press and hold the ⌘ key and you’ll see a HUD pop-up displaying all keyboard shortcuts available in the current context. This makes keyboard shortcuts more discoverable than they are on the Mac, where they’re spread across multiple menus in the menu bar.)

- -

In either case, I’ve been able to fix these keyboard problems by detaching and re-attaching the iPad from the Smart Keyboard. I don’t know if it’s a bug in iOS 9.1 or a faulty Smart Keyboard. (Apple has shipped me a second Smart Keyboard to test, but it won’t arrive until later in the day, after this review has been published. I’ll update it after the replacement arrives.)

- -

Apple Pencil

- -

It’s about precision: accuracy where you touch (Apple claims sub-pixel precision on screen), accuracy regarding pressure, and low latency regarding what you see on screen. I am not an illustrator, but I do know my own signature. My signature never looks like my actual signature when I have to sign electronically on a point-of-sale terminal. Usually it doesn’t even look close. On iPad Pro with Apple Pencil, it looks exactly like my signature when I sign with paper and ink. My handwriting looks like my handwriting, period (for better or for worse).

- -

All previous iOS devices have touchscreens designed for input from one source: fingertips. Fingertips are relatively fat and capacitive. The relatively fat size and imprecise location of a finger on screen is why tap targets are relatively larger and more spaced apart on iOS than OS X. This is also why third-party styluses for iOS devices have broad tips made of capacitive rubber — they’re more or less fake fingertips. The capacitive touchscreens on iPhones and (non-Pro) iPads aren’t designed for “fine tips”.

- -

Apple has done a few things regarding sampling the screen for input with Apple Pencil. First, there is something new in the display itself — something in the layer between the glass surface and the LCD display, I think. Or perhaps it’s under the LCD? Apple alludes to it in the Jony Ive-narrated video on the Apple Pencil web page, but they’re not really talking about it in detail.

- -

For capacitive (finger) touch, the iPad Pro samples at twice the rate of previous iPads — 120 times per second instead of 60. With the Pencil, though, the iPad Pro samples 240 times per second. The way the Pencil works requires cooperation with the display, and so there’s no way this Pencil could be made to work with existing iPads. The Pencil is not iPad Pro-exclusive out of product marketing spite — it’s exclusive to the Pro because the two were engineered in coordination with each other. And if Apple had designed the Pencil differently, to allow it to work with existing iPads, there’s no way it could have had this level of accuracy, because the tip would have needed to be broader and capacitive. (The Pencil’s tip is not capacitive at all — it doesn’t register as a touch at all on any other iOS device.)

- -

My guess is we’ll start to see Pencil support in future iOS devices in addition to the iPad Pro, starting with the iPad Air 3.

- -

Because the Pencil is round-barreled and has no clip on the cap, I was worried that it would roll around (and eventually, off) a table top. But it’s actually weighted inside, sort of like a Weeble Wobble, so unless it’s on a sloped surface, it won’t roll more than an inch or so before settling in place. In hand, I can’t tell that it’s weighted like this.

- -

I think most people who buy an iPad Pro are going to want a Smart Keyboard. The Apple Pencil is the more technically remarkable peripheral, but I suspect it’ll prove useful to far fewer people. Sketching apps like 53’s Paper and Apple’s own built-in Notes app certainly have appeal and utility to people who aren’t artists, but I suspect a lot of Apple Pencils are going to be bought out of curiosity and then go largely unused.

- -

For actual illustrators and artists, however, the Pencil and iPad Pro seem poised to be a career/industry-changing combination. What has been largely abstract — drawing using a device over here, looking at the results on a screen over there — can now be direct.

- -

Miscellaneous

- -
    -
  • Weight: The iPad Pro certainly feels heavier than recent iPads, but only in a way that’s commensurate with its increased size. It’s not too heavy.

  • -
  • Audio: The speakers are surprisingly loud. Apple told me the iPad Pro produces three times the audio volume of the iPad Air, and that certainly matches my experience. If you use your iPad as a small TV, the audio improvements might be more meaningful than the bigger display. The four-speaker stereo system is also very clever — no matter which way you rotate the iPad Pro, the top two speakers are for treble and the bottom two for bass.

  • -
  • Snap: Speaking of audio, if there’s a downside to the snug connection between the iPad Pro and the Smart Keyboard, it’s that the magnetic connection makes a rather loud snap when you connect or disconnect it. I can imagine some scenarios — in bed with a sleeping spouse, say — where this might be a problem.

  • -
  • Size classes: I think even Apple’s own apps are still figuring out how best to arrange layouts on this larger display. For example, in Mail, when the iPad Pro is in portrait, it only shows one column at a time. I think there’s clearly enough room horizontally, even in portrait, for a two-pane layout (narrow list of messages on left, wide message detail on right). The iPad Pro in portrait is as wide as the iPad Air in landscape — and the iPad Air in landscape uses two panes for Mail. Third-party developers are going to want to adjust their apps after they get a feel for what it’s like to use the iPad Pro for real.

  • -
  • Battery life: Simply outstanding. I didn’t even plug it in once between Monday and Friday, and it still had plenty of charge left. I’ve been using it for eight continuous hours as I type this sentence, and it still has more than a 50 percent charge remaining.

  • -
  • Missing apps: It’s been like this ever since the original iPad, but it still strikes me as odd that the iPad version of iOS lacks the Calculator, Weather, and Stocks apps. The Mac doesn’t have “apps” for Weather or Stocks, but it does have widgets for them in Notification Center. And it seems downright crazy for a computer not to have a built-in means for doing arithmetic. (Although you can do some arithmetic using Quick Search.)

  • -
  • Touch, Don’t Touch: For the past week I’ve really only used two computers. The iMac on my desk, and this iPad Pro. Today, though, I used my MacBook Pro while the iPad Pro was running benchmarks. And within a few minutes, I did something I have never once done before: I reached up and tried to touch something on the display. Ten minutes later I did it again. I point this out not to argue that I think MacBooks should have touch screens, but simply as an observation that even a lifelong Mac user can quickly get accustomed to the iPad Pro as a laptop.

  • -
- -

Conclusion

- -

From a hardware perspective, the iPad Pro strikes me as a seminal device. It runs faster than the Intel x86-based MacBooks, gets better battery life, and costs significantly less. And it has a better display with significantly more pixels than even a 15-inch MacBook Pro.

- -

Software-wise, support for the Smart Keyboard needs to get even smarter — but I’d be shocked if it doesn’t. For me, the iPad Pro marks the turning point where iPads are no longer merely lightweight (both physically and conceptually) alternatives to MacBooks for use in simple scenarios, to where MacBooks will now start being seen as heavyweight alternatives to iPads for complex scenarios.2

- -

Is it a MacBook replacement for me, personally? No. For you? Maybe. For many people? Yes.

- -

It brings me no joy to observe this, but the future of mass market portable computing involves neither a mouse pointer nor an x86 processor.

- -
-
-
    -
  1. -

    It’s kind of funny to think of a 2010 iPad with its 133 PPI display as “high resolution” — such a display looks comically fuzzy by today’s standards. But at the time it was a noticeably sharper display than what was in the MacBooks of the day — a 2009 13-inch MacBook Pro had a display with 113 PPI resolution↩︎

    -
  2. -
  3. -

    iOS 9’s split-screen multitasking really shines on the iPad Pro. I’ve found it useful on my iPad Air, but it’s downright natural on the iPad Pro. ↩︎︎

    -
  4. - -
-
- - - - ]]>
- ★ The iPad Pro
diff --git a/Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift b/Modules/Parser/Tests/ParserTests/AtomParserTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/AtomParserTests.swift rename to Modules/Parser/Tests/ParserTests/AtomParserTests.swift diff --git a/Modules/Parser/Tests/DateParserTests/DateParserTests.swift b/Modules/Parser/Tests/ParserTests/DateParserTests.swift similarity index 100% rename from Modules/Parser/Tests/DateParserTests/DateParserTests.swift rename to Modules/Parser/Tests/ParserTests/DateParserTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/EntityDecodingTests.swift rename to Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift similarity index 94% rename from Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift rename to Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift index c17f3ee8d..8c76c3f89 100644 --- a/Modules/Parser/Tests/FeedParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift @@ -235,10 +235,3 @@ class FeedParserTypeTests: XCTestCase { } } } - -func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { - let filename = "Resources/\(filename)" - let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! - let data = try! Data(contentsOf: URL(fileURLWithPath: path)) - return ParserData(url: url, data: data) -} diff --git a/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift similarity index 75% rename from Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift rename to Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift index 97d52e4cb..c179b8137 100644 --- a/Modules/Parser/Tests/HTMLParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift @@ -41,10 +41,3 @@ class HTMLLinkTests: XCTestCase { XCTAssertEqual(links.count, 131) } } - -func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { - let filename = "Resources/\(filename)" - let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! - let data = try! Data(contentsOf: URL(fileURLWithPath: path)) - return ParserData(url: url, data: data) -} diff --git a/Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift similarity index 100% rename from Modules/Parser/Tests/HTMLParserTests/HTMLMetadataTests.swift rename to Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/Info.plist b/Modules/Parser/Tests/ParserTests/Info.plist similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Info.plist rename to Modules/Parser/Tests/ParserTests/Info.plist diff --git a/Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/JSONFeedParserTests.swift rename to Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift diff --git a/Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift b/Modules/Parser/Tests/ParserTests/OPMLTests.swift similarity index 100% rename from Modules/Parser/Tests/OPMLParserTests/OPMLTests.swift rename to Modules/Parser/Tests/ParserTests/OPMLTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/ParserTests.swift b/Modules/Parser/Tests/ParserTests/ParserTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/ParserTests.swift rename to Modules/Parser/Tests/ParserTests/ParserTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/RSSInJSONParserTests.swift rename to Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/RSSParserTests.swift rename to Modules/Parser/Tests/ParserTests/RSSParserTests.swift diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/3960.json b/Modules/Parser/Tests/ParserTests/Resources/3960.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/3960.json rename to Modules/Parser/Tests/ParserTests/Resources/3960.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/489.rss b/Modules/Parser/Tests/ParserTests/Resources/489.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/489.rss rename to Modules/Parser/Tests/ParserTests/Resources/489.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/4fsodonline.atom b/Modules/Parser/Tests/ParserTests/Resources/4fsodonline.atom similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/4fsodonline.atom rename to Modules/Parser/Tests/ParserTests/Resources/4fsodonline.atom diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.atom b/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.atom similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.atom rename to Modules/Parser/Tests/ParserTests/Resources/DaringFireball.atom diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.html b/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.html similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.html rename to Modules/Parser/Tests/ParserTests/Resources/DaringFireball.html diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.json b/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.json rename to Modules/Parser/Tests/ParserTests/Resources/DaringFireball.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.rss b/Modules/Parser/Tests/ParserTests/Resources/DaringFireball.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/DaringFireball.rss rename to Modules/Parser/Tests/ParserTests/Resources/DaringFireball.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/EMarley.rss b/Modules/Parser/Tests/ParserTests/Resources/EMarley.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/EMarley.rss rename to Modules/Parser/Tests/ParserTests/Resources/EMarley.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/KatieFloyd.rss b/Modules/Parser/Tests/ParserTests/Resources/KatieFloyd.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/KatieFloyd.rss rename to Modules/Parser/Tests/ParserTests/Resources/KatieFloyd.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/OneFootTsunami.atom b/Modules/Parser/Tests/ParserTests/Resources/OneFootTsunami.atom similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/OneFootTsunami.atom rename to Modules/Parser/Tests/ParserTests/Resources/OneFootTsunami.atom diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/ScriptingNews.json b/Modules/Parser/Tests/ParserTests/Resources/ScriptingNews.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/ScriptingNews.json rename to Modules/Parser/Tests/ParserTests/Resources/ScriptingNews.json diff --git a/Modules/Parser/Tests/OPMLParserTests/Resources/Subs.opml b/Modules/Parser/Tests/ParserTests/Resources/Subs.opml similarity index 100% rename from Modules/Parser/Tests/OPMLParserTests/Resources/Subs.opml rename to Modules/Parser/Tests/ParserTests/Resources/Subs.opml diff --git a/Modules/Parser/Tests/OPMLParserTests/Resources/SubsNoTitleAttributes.opml b/Modules/Parser/Tests/ParserTests/Resources/SubsNoTitleAttributes.opml similarity index 100% rename from Modules/Parser/Tests/OPMLParserTests/Resources/SubsNoTitleAttributes.opml rename to Modules/Parser/Tests/ParserTests/Resources/SubsNoTitleAttributes.opml diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/YouTubeTheVolvoRocks.html b/Modules/Parser/Tests/ParserTests/Resources/YouTubeTheVolvoRocks.html similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/YouTubeTheVolvoRocks.html rename to Modules/Parser/Tests/ParserTests/Resources/YouTubeTheVolvoRocks.html diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/aktuality.rss b/Modules/Parser/Tests/ParserTests/Resources/aktuality.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/aktuality.rss rename to Modules/Parser/Tests/ParserTests/Resources/aktuality.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/allthis-partial.json b/Modules/Parser/Tests/ParserTests/Resources/allthis-partial.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/allthis-partial.json rename to Modules/Parser/Tests/ParserTests/Resources/allthis-partial.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/allthis.atom b/Modules/Parser/Tests/ParserTests/Resources/allthis.atom similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/allthis.atom rename to Modules/Parser/Tests/ParserTests/Resources/allthis.atom diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/allthis.json b/Modules/Parser/Tests/ParserTests/Resources/allthis.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/allthis.json rename to Modules/Parser/Tests/ParserTests/Resources/allthis.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/atp.rss b/Modules/Parser/Tests/ParserTests/Resources/atp.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/atp.rss rename to Modules/Parser/Tests/ParserTests/Resources/atp.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/authors.json b/Modules/Parser/Tests/ParserTests/Resources/authors.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/authors.json rename to Modules/Parser/Tests/ParserTests/Resources/authors.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/bio.rdf b/Modules/Parser/Tests/ParserTests/Resources/bio.rdf similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/bio.rdf rename to Modules/Parser/Tests/ParserTests/Resources/bio.rdf diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/cloudblog.rss b/Modules/Parser/Tests/ParserTests/Resources/cloudblog.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/cloudblog.rss rename to Modules/Parser/Tests/ParserTests/Resources/cloudblog.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/coco.html b/Modules/Parser/Tests/ParserTests/Resources/coco.html similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/coco.html rename to Modules/Parser/Tests/ParserTests/Resources/coco.html diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/curt.json b/Modules/Parser/Tests/ParserTests/Resources/curt.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/curt.json rename to Modules/Parser/Tests/ParserTests/Resources/curt.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/dcrainmaker.xml b/Modules/Parser/Tests/ParserTests/Resources/dcrainmaker.xml similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/dcrainmaker.xml rename to Modules/Parser/Tests/ParserTests/Resources/dcrainmaker.xml diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/donthitsave.xml b/Modules/Parser/Tests/ParserTests/Resources/donthitsave.xml similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/donthitsave.xml rename to Modules/Parser/Tests/ParserTests/Resources/donthitsave.xml diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/expertopinionent.atom b/Modules/Parser/Tests/ParserTests/Resources/expertopinionent.atom similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/expertopinionent.atom rename to Modules/Parser/Tests/ParserTests/Resources/expertopinionent.atom diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/furbo.html b/Modules/Parser/Tests/ParserTests/Resources/furbo.html similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/furbo.html rename to Modules/Parser/Tests/ParserTests/Resources/furbo.html diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/inessential.html b/Modules/Parser/Tests/ParserTests/Resources/inessential.html similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/inessential.html rename to Modules/Parser/Tests/ParserTests/Resources/inessential.html diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/inessential.json b/Modules/Parser/Tests/ParserTests/Resources/inessential.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/inessential.json rename to Modules/Parser/Tests/ParserTests/Resources/inessential.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/kc0011.rss b/Modules/Parser/Tests/ParserTests/Resources/kc0011.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/kc0011.rss rename to Modules/Parser/Tests/ParserTests/Resources/kc0011.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/livemint.xml b/Modules/Parser/Tests/ParserTests/Resources/livemint.xml similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/livemint.xml rename to Modules/Parser/Tests/ParserTests/Resources/livemint.xml diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/macworld.rss b/Modules/Parser/Tests/ParserTests/Resources/macworld.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/macworld.rss rename to Modules/Parser/Tests/ParserTests/Resources/macworld.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/manton.rss b/Modules/Parser/Tests/ParserTests/Resources/manton.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/manton.rss rename to Modules/Parser/Tests/ParserTests/Resources/manton.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/monkeydom.rss b/Modules/Parser/Tests/ParserTests/Resources/monkeydom.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/monkeydom.rss rename to Modules/Parser/Tests/ParserTests/Resources/monkeydom.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/natasha.xml b/Modules/Parser/Tests/ParserTests/Resources/natasha.xml similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/natasha.xml rename to Modules/Parser/Tests/ParserTests/Resources/natasha.xml diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/phpxml.rss b/Modules/Parser/Tests/ParserTests/Resources/phpxml.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/phpxml.rss rename to Modules/Parser/Tests/ParserTests/Resources/phpxml.rss diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/pxlnv.json b/Modules/Parser/Tests/ParserTests/Resources/pxlnv.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/pxlnv.json rename to Modules/Parser/Tests/ParserTests/Resources/pxlnv.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/rose.json b/Modules/Parser/Tests/ParserTests/Resources/rose.json similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/rose.json rename to Modules/Parser/Tests/ParserTests/Resources/rose.json diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/russcox.atom b/Modules/Parser/Tests/ParserTests/Resources/russcox.atom similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/russcox.atom rename to Modules/Parser/Tests/ParserTests/Resources/russcox.atom diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/scriptingNews.rss b/Modules/Parser/Tests/ParserTests/Resources/scriptingNews.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/scriptingNews.rss rename to Modules/Parser/Tests/ParserTests/Resources/scriptingNews.rss diff --git a/Modules/Parser/Tests/HTMLParserTests/Resources/sixcolors.html b/Modules/Parser/Tests/ParserTests/Resources/sixcolors.html similarity index 100% rename from Modules/Parser/Tests/HTMLParserTests/Resources/sixcolors.html rename to Modules/Parser/Tests/ParserTests/Resources/sixcolors.html diff --git a/Modules/Parser/Tests/FeedParserTests/Resources/theomnishow.rss b/Modules/Parser/Tests/ParserTests/Resources/theomnishow.rss similarity index 100% rename from Modules/Parser/Tests/FeedParserTests/Resources/theomnishow.rss rename to Modules/Parser/Tests/ParserTests/Resources/theomnishow.rss From 3e6e843dc826e52fef00b5e32736077513e7bc35 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 22 Sep 2024 21:40:52 -0700 Subject: [PATCH 72/88] Create first draft of HTMLMetadata. --- .../Sources/HTMLParser/HTMLMetadata.swift | 392 ++++++++++++++++++ .../Sources/{SAX => HTMLParser}/HTMLTag.swift | 6 +- .../Tests/ParserTests/HTMLLinkTests.swift | 1 - 3 files changed, 396 insertions(+), 3 deletions(-) create mode 100644 Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift rename Modules/Parser/Sources/{SAX => HTMLParser}/HTMLTag.swift (63%) diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift b/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift new file mode 100644 index 000000000..0e36b93d7 --- /dev/null +++ b/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift @@ -0,0 +1,392 @@ +// +// HTMLMetadata.swift +// +// +// Created by Brent Simmons on 9/22/24. +// + +import Foundation +import SAX + +public final class HTMLMetadata { + + public let baseURLString: String + public let tags: [HTMLTag] + public let favicons: [HTMLMetadataFavicon]? + public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]? + public let feedLinks: [HTMLMetadataFeedLink]? + public let openGraphProperties: HTMLOpenGraphProperties? + public let twitterProperties: HTMLTwitterProperties? + + init(_ urlString: String, _ tags: [HTMLTag]) { + + self.baseURLString = urlString + self.tags = tags + + self.favicons = Self.resolvedFaviconLinks(urlString, tags) + + if let appleTouchIconTags = Self.appleTouchIconTags(tags) { + self.appleTouchIcons = appleTouchIconTags.map { htmlTag in + HTMLMetadataAppleTouchIcon(urlString, htmlTag) + } + } + else { + self.appleTouchIcons = nil + } + + if let feedLinkTags = Self.feedLinkTags(tags) { + self.feedLinks = feedLinkTags.map { htmlTag in + HTMLMetadataFeedLink(urlString, htmlTag) + } + } + else { + self.feedLinks = nil + } + + self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags) + self.twitterProperties = HTMLTwitterProperties(urlString, tags) + } + + static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? { + + let linkTags = linkTagsWithMatchingRel("icon") + var seenHrefs = [String]() + + let favicons = linkTags.compactMap { htmlTag in + + let favicon = HTMLMetadataFavicon(baseURLString, htmlTag) + guard let urlString = favicon.urlString else { + return nil + } + guard !seenHrefs.contains(urlString) else { + return nil + } + seenHrefs.append(urlString) + return favicon + } + + return favicons.isEmpty ? nil : favicons + } + + static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? { + + guard let linkTags = linkTags(tags) else { + return nil + } + + let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], tags) + return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags + } + + static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? { + + let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else { + return nil + } + + let feedLinkTags = alternateLinkTags.filter { tag in + + guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else { + return false + } + guard let urlString = urlString(from: attributes), !urlString.isEmpty else { + return false + } + + return true + } + + return feedLinkTags.isEmpty ? nil : feedLinkTags + } + + static func typeIsFeedType(_ type: String) -> Bool { + + let lowerType = type.lowercased() + return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json") + } + + static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? { + + let linkTags = tags.filter { $0.tagType == .link } + return linkTags.isEmpty ? nil : linkTags + } + + static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? { + + // Case-insensitive; matches a whitespace-delimited word + + guard let linkTags = linkTags(tags) else { + return nil + } + + let tagsWithURLString = linkTags.filter { tag in + guard let urlString = urlStringFromDictionary(tag.attributes), !urlString.isEmpty else { + return false + } + return true + } + if tagsWithURLString.isEmpty { + return nil + } + + let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString) + return matchingTags.isEmpty ? nil : matchingTags + } + + static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? { + + let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() } + + let matchingTags: [HTMLTag] = { + + tags.filter { tag in + + guard let relValue = relValue(tag.attributes) else { + return false + } + + let relValues = relValue.componentsSeparatedByCharactersInSet(.whitespacesAndNewlines) + for oneRelValue in relValues { + let oneLowerRelValue = oneRelValue.lowercased() + + for lowerValueToMatch in lowerValuesToMatch { + if lowerValueToMatch == oneLowerRelValue { + return true + } + } + } + + return false + } + } + + return matchingTags.isEmpty ? nil : matchingTags + } +} + +public final class HTMLMetadataAppleTouchIcon { + + public let rel: String? + public let sizes: String? + public let size: CGSize? + public let urlString: String? // Absolute + + init(_ urlString: String, _ tag: HTMLTag) { + + guard let attributes = tag.attributes else { + self.rel = nil + self.sizes = nil + self.size = nil + self.urlString = nil + return + } + + self.rel = attributes.object(forCaseInsensitiveKey: "rel") + self.urlString = absoluteURLStringWithDictionary(attributes) + + guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else { + self.sizes = nil + self.size = nil + return + } + self.sizes = sizes + + let size: CGSize? = { + let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x")) + guard sizeComponents.count == 2 else { + return nil + } + let width = Double(sizeComponents[0]) + let height = Double(sizeComponents[1]) + return CGSize(width: width, height: height) + }() + + self.size = size + } +} + +public final class HTMLMetadataFeedLink { + + public let title: String? + public let type: String? + public let urlString: String? // Absolute + + init(_ urlString: String, _ tag: HTMLTag) { + + guard let attributes = tag.attributes else { + self.title = nil + self.type = nil + self.urlString = nil + return + } + + self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString) + self.title = attributes.object(forCaseInsensitiveKey: "title") + self.type = attributes.object(forCaseInsensitiveKey: "type") + } +} + +public final class HTMLMetadataFavicon { + + public let type: String? + public let urlString: String? + + init(_ urlString: String, _ tag: HTMLTag) { + + guard let attributes = tag.attributes else { + self.type = nil + self.urlString = nil + return + } + + self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString) + self.type = attributes.object(forCaseInsensitiveKey: "type") + } +} + +public final class HTMLOpenGraphProperties { + + // TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image. + // See http://ogp.me/ + + public let image: HTMLOpenGraphImage? + + init(_ urlString: String, _ tags: [HTMLTag]) { + + self.image = Self.parse(tags) + } +} + +private extension HTMLOpenGraphProperties { + + private static let ogPrefix = "og:" + + struct OGKey { + static let property = "property" + static let content = "content" + } + + struct OGValue { + static let ogImage = "og:image" + static let ogImageURL = "og:image:url" + static let ogImageSecureURL = "og:image:secure_url" + static let ogImageType = "og:image:type" + static let ogImageAlt = "og:image:alt" + static let ogImageWidth = "og:image:width" + static let ogImageHeight = "og:image:height" + } + + static func parse(_ tags: [HTMLTag]) -> [HTMLOpenGraphImage]? { + + let metaTags = tags.filter { $0.tagType == .meta } + if metaTags.isEmpty { + return nil + } + + // HTMLOpenGraphImage properties to fill in. + var url: String? + var secureURL: String? + var mimeType: String? + var width: CGFloat? + var height: CGFloat? + var altText: String? + + for tag in metaTags { + + guard let attributes = tag.attributes else { + continue + } + guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else { + continue + } + guard let content = attributes[OGKey.content] else { + continue + } + + if propertyName == OGValue.ogImage { + url = content + } + else if propertyName == OGValue.ogImageURL { + url = content + } + else if propertyName == OGValue.ogImageSecureURL { + secureURL = content + } + else if propertyName == OGValue.ogImageType { + mimeType = content + } + else if propertyName == OGValue.ogImageAlt { + altText = content + } + else if propertyName == OGValue.ogImageWidth { + width = CGFloat(content) + } + else if propertyName == OGValue.ogImageHeight { + height = CGFloat(content) + } + } + + if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil { + return nil + } + + return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText) + } +} + +public final class HTMLOpenGraphImage { + + public let url : String? + public let secureURL: String? + public let mimeType: String? + public let width: CGFloat? + public let height: CGFloat? + public let altText: String? + + init(url: String?, secureURL: String?, mimeType: String, width: CGFloat?, height: CGFloat?, altText: String?) { + + self.url = url + self.secureURL = secureURL + self.mimeType = mimeType + self.width = width + self.height = height + self.altText = altText + } +} + +public final class HTMLTwitterProperties { + + public let imageURL: String? // twitter:image:src + + private struct TwitterKey { + static let name = "name" + static let content = "content" + } + + private struct TwitterValue { + static let imageSrc = "twitter:image:src" + } + + init(_ urlString: String, _ tags: [HTMLTag]) { + + let imageURL: String = { + for tag in tags { + guard tag.tagType == .meta else { + continue + } + guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else { + continue + } + guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else { + continue + } + return content + } + + return nil + }() + + self.imageURL = imageURL + } +} + diff --git a/Modules/Parser/Sources/SAX/HTMLTag.swift b/Modules/Parser/Sources/HTMLParser/HTMLTag.swift similarity index 63% rename from Modules/Parser/Sources/SAX/HTMLTag.swift rename to Modules/Parser/Sources/HTMLParser/HTMLTag.swift index 1333d9cff..e0bcfad5e 100644 --- a/Modules/Parser/Sources/SAX/HTMLTag.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLTag.swift @@ -7,6 +7,8 @@ import Foundation +public typealias HTMLTagAttributes = [String: String] + public struct HTMLTag: Sendable { public enum TagType: Sendable { @@ -15,9 +17,9 @@ public struct HTMLTag: Sendable { } public let tagType: TagType - public let attributes: [String: String]? + public let attributes: HTMLTagAttributes? - public init(tagType: TagType, attributes: [String : String]?) { + public init(tagType: TagType, attributes: HTMLTagAttributes?) { self.tagType = tagType self.attributes = attributes } diff --git a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift index c179b8137..ac3c6f362 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift @@ -8,7 +8,6 @@ import XCTest import HTMLParser -import SAX import libxml2 class HTMLLinkTests: XCTestCase { From 117348617988288ad11bc08af576598df71e4961 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 22 Sep 2024 22:13:06 -0700 Subject: [PATCH 73/88] Fix errors in HTMLMetadata. --- .../Sources/HTMLParser/HTMLMetadata.swift | 104 +++++++++++++----- 1 file changed, 75 insertions(+), 29 deletions(-) diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift b/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift index 0e36b93d7..bf804d760 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift @@ -49,10 +49,13 @@ public final class HTMLMetadata { static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? { - let linkTags = linkTagsWithMatchingRel("icon") + guard let linkTags = linkTagsWithMatchingRel("icon", tags) else { + return nil + } + var seenHrefs = [String]() - let favicons = linkTags.compactMap { htmlTag in + let favicons: [HTMLMetadataFavicon] = linkTags.compactMap { htmlTag in let favicon = HTMLMetadataFavicon(baseURLString, htmlTag) guard let urlString = favicon.urlString else { @@ -74,13 +77,15 @@ public final class HTMLMetadata { return nil } - let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], tags) + guard let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], linkTags) else { + return nil + } return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags } static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? { - let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else { + guard let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else { return nil } @@ -120,7 +125,10 @@ public final class HTMLMetadata { } let tagsWithURLString = linkTags.filter { tag in - guard let urlString = urlStringFromDictionary(tag.attributes), !urlString.isEmpty else { + guard let attributes = tag.attributes else { + return false + } + guard let urlString = urlString(from: attributes), !urlString.isEmpty else { return false } return true @@ -129,7 +137,9 @@ public final class HTMLMetadata { return nil } - let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString) + guard let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString) else { + return nil + } return matchingTags.isEmpty ? nil : matchingTags } @@ -141,11 +151,14 @@ public final class HTMLMetadata { tags.filter { tag in - guard let relValue = relValue(tag.attributes) else { + guard let attributes = tag.attributes else { + return false + } + guard let relValue = relValue(from: attributes) else { return false } - let relValues = relValue.componentsSeparatedByCharactersInSet(.whitespacesAndNewlines) + let relValues = relValue.components(separatedBy: .whitespacesAndNewlines) for oneRelValue in relValues { let oneLowerRelValue = oneRelValue.lowercased() @@ -158,7 +171,7 @@ public final class HTMLMetadata { return false } - } + }() return matchingTags.isEmpty ? nil : matchingTags } @@ -182,7 +195,7 @@ public final class HTMLMetadataAppleTouchIcon { } self.rel = attributes.object(forCaseInsensitiveKey: "rel") - self.urlString = absoluteURLStringWithDictionary(attributes) + self.urlString = absoluteURLString(from: attributes, baseURL: urlString) guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else { self.sizes = nil @@ -191,17 +204,13 @@ public final class HTMLMetadataAppleTouchIcon { } self.sizes = sizes - let size: CGSize? = { - let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x")) - guard sizeComponents.count == 2 else { - return nil - } - let width = Double(sizeComponents[0]) - let height = Double(sizeComponents[1]) - return CGSize(width: width, height: height) - }() - - self.size = size + let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x")) + if sizeComponents.count == 2, let width = Double(sizeComponents[0]), let height = Double(sizeComponents[1]) { + self.size = CGSize(width: width, height: height) + } + else { + self.size = nil + } } } @@ -220,7 +229,7 @@ public final class HTMLMetadataFeedLink { return } - self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString) + self.urlString = absoluteURLString(from: attributes, baseURL: urlString) self.title = attributes.object(forCaseInsensitiveKey: "title") self.type = attributes.object(forCaseInsensitiveKey: "type") } @@ -239,7 +248,7 @@ public final class HTMLMetadataFavicon { return } - self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString) + self.urlString = absoluteURLString(from: attributes, baseURL: urlString) self.type = attributes.object(forCaseInsensitiveKey: "type") } } @@ -276,7 +285,7 @@ private extension HTMLOpenGraphProperties { static let ogImageHeight = "og:image:height" } - static func parse(_ tags: [HTMLTag]) -> [HTMLOpenGraphImage]? { + static func parse(_ tags: [HTMLTag]) -> HTMLOpenGraphImage? { let metaTags = tags.filter { $0.tagType == .meta } if metaTags.isEmpty { @@ -319,10 +328,14 @@ private extension HTMLOpenGraphProperties { altText = content } else if propertyName == OGValue.ogImageWidth { - width = CGFloat(content) + if let value = Double(content) { + width = CGFloat(value) + } } else if propertyName == OGValue.ogImageHeight { - height = CGFloat(content) + if let value = Double(content) { + height = CGFloat(value) + } } } @@ -343,8 +356,8 @@ public final class HTMLOpenGraphImage { public let height: CGFloat? public let altText: String? - init(url: String?, secureURL: String?, mimeType: String, width: CGFloat?, height: CGFloat?, altText: String?) { - + init(url: String?, secureURL: String?, mimeType: String?, width: CGFloat?, height: CGFloat?, altText: String?) { + self.url = url self.secureURL = secureURL self.mimeType = mimeType @@ -369,7 +382,7 @@ public final class HTMLTwitterProperties { init(_ urlString: String, _ tags: [HTMLTag]) { - let imageURL: String = { + let imageURL: String? = { for tag in tags { guard tag.tagType == .meta else { continue @@ -390,3 +403,36 @@ public final class HTMLTwitterProperties { } } +private func urlString(from attributes: HTMLTagAttributes) -> String? { + + if let urlString = attributes.object(forCaseInsensitiveKey: "href") { + return urlString + } + return attributes.object(forCaseInsensitiveKey: "src") +} + +private func relValue(from attributes: HTMLTagAttributes) -> String? { + + attributes.object(forCaseInsensitiveKey: "rel") +} + +private func absoluteURLString(from attributes: HTMLTagAttributes, baseURL: String) -> String? { + + guard let urlString = urlString(from: attributes), !urlString.isEmpty else { + return nil + } + + return absoluteURLStringWithRelativeURLString(urlString, baseURLString: baseURL) +} + +private func absoluteURLStringWithRelativeURLString(_ relativeURLString: String, baseURLString: String) -> String? { + + guard let baseURL = URL(string: baseURLString) else { + return nil + } + guard let absoluteURL = URL(string: relativeURLString, relativeTo: baseURL) else { + return nil + } + return absoluteURL.absoluteURL.standardized.absoluteString +} + From cbc2790121cf211ec572eb7e0a504cdb5b2f373e Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sun, 22 Sep 2024 22:26:55 -0700 Subject: [PATCH 74/88] Start HTMLMetadataParser. --- .../HTMLParser/HTMLMetadataParser.swift | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift b/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift new file mode 100644 index 000000000..4ffa7d732 --- /dev/null +++ b/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift @@ -0,0 +1,41 @@ +// +// HTMLMetadataParser.swift +// +// +// Created by Brent Simmons on 9/22/24. +// + +import Foundation +import SAX + +public final class HTMLMetadataParser { + + private let parserData: ParserData + private var tags = [HTMLTag]() + private var htmlMetadata: HTMLMetadata? = nil + + public static func metadata(with parserData: ParserData) -> HTMLMetadata { + + let parser = HTMLMetadataParser(parserData) + parser.parse() + return parser.htmlMetadata + } + + init(_ parserData: ParserData) { + + self.parserData = parserData + } +} + +private extension HTMLMetadataParser { + + func parse() { + + self.tags = [HTMLTag]() + + let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data) + htmlParser.parse() + + self.htmlMetadata = HTMLMetadata(parserData.url, tags) + } +} From a39130ff775983f8b35b1a74736cea8d3e19a6f3 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:11:23 -0700 Subject: [PATCH 75/88] =?UTF-8?q?Add=20StringDictionary=20typealias.=20It?= =?UTF-8?q?=E2=80=99s=20such=20a=20common=20type=20=E2=80=94=C2=A0it=20sho?= =?UTF-8?q?uld=20have=20a=20consistent=20name.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../FoundationExtras/Dictionary+Extensions.swift | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift diff --git a/Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift b/Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift new file mode 100644 index 000000000..ec4363c0f --- /dev/null +++ b/Modules/FoundationExtras/Sources/FoundationExtras/Dictionary+Extensions.swift @@ -0,0 +1,10 @@ +// +// Dictionary+Extensions.swift +// +// +// Created by Brent Simmons on 9/23/24. +// + +import Foundation + +public typealias StringDictionary = [String: String] From 1f8b296a9ceec720b11970320a5e446841ad73fa Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:13:55 -0700 Subject: [PATCH 76/88] Use new StringDictionary name. --- Modules/Parser/Package.swift | 5 ++++- Modules/Parser/Sources/SAX/SAXHTMLParser.swift | 5 ++--- Modules/Parser/Sources/SAX/SAXParser.swift | 5 ++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 390026a3f..d475aca85 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -46,6 +46,7 @@ let package = Package( .target( name: "HTMLParser", dependencies: [ + "FoundationExtras", "SAX" ], swiftSettings: [ @@ -63,7 +64,9 @@ let package = Package( ]), .target( name: "SAX", - dependencies: [], + dependencies: [ + "FoundationExtras" + ], swiftSettings: [ .enableExperimentalFeature("StrictConcurrency") ]), diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift index d37701b18..7d5c2ed77 100644 --- a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift +++ b/Modules/Parser/Sources/SAX/SAXHTMLParser.swift @@ -6,6 +6,7 @@ // import Foundation +import FoundationExtras import libxml2 public protocol SAXHTMLParserDelegate: AnyObject { @@ -94,9 +95,7 @@ public final class SAXHTMLParser { characters.count = 0 } - public typealias HTMLAttributesDictionary = [String: String] - - public func attributesDictionary(_ attributes: UnsafePointer?) -> HTMLAttributesDictionary? { + public func attributesDictionary(_ attributes: UnsafePointer?) -> StringDictionary? { guard let attributes else { return nil diff --git a/Modules/Parser/Sources/SAX/SAXParser.swift b/Modules/Parser/Sources/SAX/SAXParser.swift index a9e93bd9f..ace5a04a6 100644 --- a/Modules/Parser/Sources/SAX/SAXParser.swift +++ b/Modules/Parser/Sources/SAX/SAXParser.swift @@ -6,6 +6,7 @@ // import Foundation +import FoundationExtras import libxml2 public typealias XMLPointer = UnsafePointer @@ -91,9 +92,7 @@ public final class SAXParser { characters.count = 0 } - public typealias XMLAttributesDictionary = [String: String] - - public func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> XMLAttributesDictionary? { + public func attributesDictionary(_ attributes: UnsafePointer?, attributeCount: Int) -> StringDictionary? { guard attributeCount > 0, let attributes else { return nil From 5eb2c524e60dcdbbced5964eafc0ece1232ee19d Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:37:54 -0700 Subject: [PATCH 77/88] Use StringDictionary typealias. Delete unused ParserTests.swift. --- .../Sources/FeedParser/Feeds/XML/AtomParser.swift | 9 +++++---- .../Sources/FeedParser/Feeds/XML/RSSParser.swift | 7 ++++--- Modules/Parser/Tests/ParserTests/ParserTests.swift | 12 ------------ 3 files changed, 9 insertions(+), 19 deletions(-) delete mode 100644 Modules/Parser/Tests/ParserTests/ParserTests.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 5d01f2479..4aaa77925 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -7,6 +7,7 @@ // import Foundation +import FoundationExtras import SAX import DateParser @@ -27,8 +28,8 @@ final class AtomParser { articles.last } - private var attributesStack = [SAXParser.XMLAttributesDictionary]() - private var currentAttributes: SAXParser.XMLAttributesDictionary? { + private var attributesStack = [StringDictionary]() + private var currentAttributes: StringDictionary? { attributesStack.last } @@ -241,7 +242,7 @@ private extension AtomParser { } } - func enclosure(_ urlString: String, _ attributes: SAXParser.XMLAttributesDictionary) -> RSSEnclosure? { + func enclosure(_ urlString: String, _ attributes: StringDictionary) -> RSSEnclosure? { let enclosure = RSSEnclosure(url: urlString) enclosure.title = attributes[XMLString.title] @@ -293,7 +294,7 @@ extension AtomParser: SAXParserDelegate { return } - let xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) ?? SAXParser.XMLAttributesDictionary() + let xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) ?? StringDictionary() attributesStack.append(xmlAttributes) if parsingXHTML { diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index e06007b59..4b06a6123 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -7,6 +7,7 @@ // import Foundation +import FoundationExtras import SAX import DateParser @@ -31,7 +32,7 @@ public final class RSSParser { private var parsingArticle = false private var parsingChannelImage = false private var parsingAuthor = false - private var currentAttributes: SAXParser.XMLAttributesDictionary? + private var currentAttributes: StringDictionary? static func parsedFeed(with parserData: ParserData) -> RSSFeed { @@ -265,7 +266,7 @@ private extension RSSParser { static let type = "type" } - func addEnclosure(_ attributes: SAXParser.XMLAttributesDictionary, _ currentArticle: RSSArticle) { + func addEnclosure(_ attributes: StringDictionary, _ currentArticle: RSSArticle) { guard let url = attributes[EnclosureKey.url], !url.isEmpty else { return @@ -304,7 +305,7 @@ extension RSSParser: SAXParserDelegate { return } - var xmlAttributes: SAXParser.XMLAttributesDictionary? = nil + var xmlAttributes: StringDictionary? = nil if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(localName, XMLName.enclosure) { xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) } diff --git a/Modules/Parser/Tests/ParserTests/ParserTests.swift b/Modules/Parser/Tests/ParserTests/ParserTests.swift deleted file mode 100644 index 814773283..000000000 --- a/Modules/Parser/Tests/ParserTests/ParserTests.swift +++ /dev/null @@ -1,12 +0,0 @@ -import XCTest -import FeedParser - -final class ParserTests: XCTestCase { - func testExample() throws { - // XCTest Documentation - // https://developer.apple.com/documentation/xctest - - // Defining Test Cases and Test Methods - // https://developer.apple.com/documentation/xctest/defining_test_cases_and_test_methods - } -} From 21848049f690b98b72f2ac9c1a9f78fc932cca83 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:38:07 -0700 Subject: [PATCH 78/88] Use StringDictionary typealias. --- Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift b/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift index 5a04612de..00994fd63 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift @@ -1,11 +1,12 @@ // -// File.swift +// HTMLLinkParser.swift // // // Created by Brent Simmons on 9/21/24. // import Foundation +import FoundationExtras import SAX public final class HTMLLinkParser { @@ -49,12 +50,12 @@ extension HTMLLinkParser: SAXHTMLParserDelegate { static let title = "title" } - private func title(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? { + private func title(with attributesDictionary: StringDictionary) -> String? { attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.title) } - private func urlString(with attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) -> String? { + private func urlString(with attributesDictionary: StringDictionary) -> String? { guard let href = attributesDictionary.object(forCaseInsensitiveKey: HTMLAttributeName.href), !href.isEmpty else { return nil @@ -68,7 +69,7 @@ extension HTMLLinkParser: SAXHTMLParserDelegate { return absoluteURL.absoluteString } - private func handleLinkAttributes(_ attributesDictionary: SAXHTMLParser.HTMLAttributesDictionary) { + private func handleLinkAttributes(_ attributesDictionary: StringDictionary) { guard let currentLink else { assertionFailure("currentLink must not be nil") From 3bc02a78a3aee5cb801111285659872836037a12 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:38:23 -0700 Subject: [PATCH 79/88] Get HTMLMetadataParser working. --- .../HTMLParser/HTMLMetadataParser.swift | 88 ++++++++++++++++--- 1 file changed, 75 insertions(+), 13 deletions(-) diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift b/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift index 4ffa7d732..ab96da818 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift +++ b/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift @@ -6,36 +6,98 @@ // import Foundation +import FoundationExtras import SAX public final class HTMLMetadataParser { - private let parserData: ParserData private var tags = [HTMLTag]() - private var htmlMetadata: HTMLMetadata? = nil public static func metadata(with parserData: ParserData) -> HTMLMetadata { - let parser = HTMLMetadataParser(parserData) - parser.parse() - return parser.htmlMetadata - } - - init(_ parserData: ParserData) { - - self.parserData = parserData + HTMLMetadataParser().parse(parserData) } } private extension HTMLMetadataParser { - func parse() { + func parse(_ parserData: ParserData) -> HTMLMetadata { - self.tags = [HTMLTag]() + tags = [HTMLTag]() let htmlParser = SAXHTMLParser(delegate: self, data: parserData.data) htmlParser.parse() - self.htmlMetadata = HTMLMetadata(parserData.url, tags) + return HTMLMetadata(parserData.url, tags) + } +} + +extension HTMLMetadataParser: SAXHTMLParserDelegate { + + private struct HTMLName { + + static let link = "link".utf8CString + static let meta = "meta".utf8CString + } + + private struct HTMLKey { + + static let href = "href" + static let src = "src" + static let rel = "rel" + } + + private func link(with attributes: StringDictionary) -> String? { + + if let link = attributes.object(forCaseInsensitiveKey: HTMLKey.href) { + return link + } + + return attributes.object(forCaseInsensitiveKey: HTMLKey.src) + } + + private func handleLinkAttributes(_ attributes: StringDictionary) { + + guard let rel = attributes.object(forCaseInsensitiveKey: HTMLKey.rel), !rel.isEmpty else { + return + } + guard let link = link(with: attributes), !link.isEmpty else { + return + } + + let tag = HTMLTag(tagType: .link, attributes: attributes) + tags.append(tag) + } + + private func handleMetaAttributes(_ attributes: StringDictionary) { + + let tag = HTMLTag(tagType: .meta, attributes: attributes) + tags.append(tag) + } + + public func saxHTMLParser(_ saxHTMLParser: SAXHTMLParser, startElement name: XMLPointer, attributes: UnsafePointer?) { + + if SAXEqualTags(name, HTMLName.link) { + let d = saxHTMLParser.attributesDictionary(attributes) + if let d, !d.isEmpty { + handleLinkAttributes(d) + } + } + else if SAXEqualTags(name, HTMLName.meta) { + let d = saxHTMLParser.attributesDictionary(attributes) + if let d, !d.isEmpty { + handleMetaAttributes(d) + } + } + } + + public func saxHTMLParser(_: SAXHTMLParser, endElement: XMLPointer) { + + // Nothing to do + } + + public func saxHTMLParser(_: SAXHTMLParser, charactersFound: XMLPointer, count: Int) { + + // Nothing to do } } From 986dbd679c313aca1e4df3361cfe781cb752ef6f Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:38:44 -0700 Subject: [PATCH 80/88] Uncomment and revise code in HTMLMetadataTests. --- .../Tests/ParserTests/HTMLMetadataTests.swift | 285 +++++++++--------- 1 file changed, 143 insertions(+), 142 deletions(-) diff --git a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift index 12343f615..4133a9a8c 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift @@ -7,146 +7,147 @@ // import XCTest +import HTMLParser -//class HTMLMetadataTests: XCTestCase { -// -// func testDaringFireball() { -// -// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// -// XCTAssertEqual(metadata.favicons.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") -// -// XCTAssertEqual(metadata.feedLinks.count, 1) -// -// let feedLink = metadata.feedLinks.first! -// XCTAssertNil(feedLink.title) -// XCTAssertEqual(feedLink.type, "application/atom+xml") -// XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main") -// } -// -// func testDaringFireballPerformance() { -// -// // 0.002 sec on my 2012 iMac -// let d = parserData("DaringFireball", "html", "http://daringfireball.net/") -// self.measure { -// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) -// } -// } -// -// func testFurbo() { -// -// let d = parserData("furbo", "html", "http://furbo.org/") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// -// XCTAssertEqual(metadata.favicons.first?.urlString, "http://furbo.org/favicon.ico") -// -// XCTAssertEqual(metadata.feedLinks.count, 1) -// -// let feedLink = metadata.feedLinks.first! -// XCTAssertEqual(feedLink.title, "Iconfactory News Feed") -// XCTAssertEqual(feedLink.type, "application/rss+xml") -// } -// -// func testFurboPerformance() { -// -// // 0.001 sec on my 2012 iMac -// let d = parserData("furbo", "html", "http://furbo.org/") -// self.measure { -// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) -// } -// } -// -// func testInessential() { -// -// let d = parserData("inessential", "html", "http://inessential.com/") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// -// XCTAssertNil(metadata.favicons.first?.urlString) -// -// XCTAssertEqual(metadata.feedLinks.count, 1) -// let feedLink = metadata.feedLinks.first! -// XCTAssertEqual(feedLink.title, "RSS") -// XCTAssertEqual(feedLink.type, "application/rss+xml") -// XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml") -// -// XCTAssertEqual(metadata.appleTouchIcons.count, 0); -// } -// -// func testInessentialPerformance() { -// -// // 0.001 sec on my 2012 iMac -// let d = parserData("inessential", "html", "http://inessential.com/") -// self.measure { -// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) -// } -// } -// -// func testCocoPerformance() { -// -// // 0.004 sec on my 2012 iMac -// let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") -// self.measure { -// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) -// } -// } -// -// func testSixColors() { -// -// let d = parserData("sixcolors", "html", "http://sixcolors.com/") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// -// XCTAssertEqual(metadata.favicons.first?.urlString, "https://sixcolors.com/images/favicon.ico") -// -// XCTAssertEqual(metadata.feedLinks.count, 1); -// let feedLink = metadata.feedLinks.first! -// XCTAssertEqual(feedLink.title, "RSS"); -// XCTAssertEqual(feedLink.type, "application/rss+xml"); -// XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors"); -// -// XCTAssertEqual(metadata.appleTouchIcons.count, 6); -// let icon = metadata.appleTouchIcons[3]; -// XCTAssertEqual(icon.rel, "apple-touch-icon"); -// XCTAssertEqual(icon.sizes, "120x120"); -// XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png"); -// } -// -// func testSixColorsPerformance() { -// -// // 0.002 sec on my 2012 iMac -// let d = parserData("sixcolors", "html", "http://sixcolors.com/") -// self.measure { -// let _ = RSHTMLMetadataParser.htmlMetadata(with: d) -// } -// } -// -// func testCocoOGImage() { -// -// let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// let openGraphData = metadata.openGraphProperties -// let image = openGraphData.images.first! -// XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") -// } -// -// func testCocoTwitterImage() { -// -// let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// let twitterData = metadata.twitterProperties -// let imageURL = twitterData.imageURL! -// XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") -// } -// -// func testYouTube() { -// // YouTube is a special case — the feed links appear after the head section, in the body section. -// let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks") -// let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) -// -// XCTAssertEqual(metadata.feedLinks.count, 1); -// let feedLink = metadata.feedLinks.first! -// XCTAssertEqual(feedLink.title, "RSS"); -// XCTAssertEqual(feedLink.type, "application/rss+xml"); -// XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ"); -// } -//} +final class HTMLMetadataTests: XCTestCase { + + func testDaringFireball() { + + let d = parserData("DaringFireball", "html", "http://daringfireball.net/") + let metadata = HTMLMetadataParser.metadata(with: d) + + XCTAssertEqual(metadata.favicons?.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") + + XCTAssertEqual(metadata.feedLinks?.count, 1) + + let feedLink: HTMLMetadataFeedLink = (metadata.feedLinks?.first!)! + XCTAssertNil(feedLink.title) + XCTAssertEqual(feedLink.type, "application/atom+xml") + XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main") + } + + func testDaringFireballPerformance() { + + // 0.002 sec on my 2012 iMac + let d = parserData("DaringFireball", "html", "http://daringfireball.net/") + self.measure { + let _ = HTMLMetadataParser.metadata(with: d) + } + } + + func testFurbo() { + + let d = parserData("furbo", "html", "http://furbo.org/") + let metadata = HTMLMetadataParser.metadata(with: d) + + XCTAssertEqual(metadata.favicons?.first?.urlString, "http://furbo.org/favicon.ico") + + XCTAssertEqual(metadata.feedLinks?.count, 1) + + let feedLink = (metadata.feedLinks?.first!)! + XCTAssertEqual(feedLink.title, "Iconfactory News Feed") + XCTAssertEqual(feedLink.type, "application/rss+xml") + } + + func testFurboPerformance() { + + // 0.001 sec on my 2012 iMac + let d = parserData("furbo", "html", "http://furbo.org/") + self.measure { + let _ = HTMLMetadataParser.metadata(with: d) + } + } + + func testInessential() { + + let d = parserData("inessential", "html", "http://inessential.com/") + let metadata = HTMLMetadataParser.metadata(with: d) + + XCTAssertNil(metadata.favicons?.first?.urlString) + + XCTAssertEqual(metadata.feedLinks?.count, 1) + let feedLink = (metadata.feedLinks?.first!)! + XCTAssertEqual(feedLink.title, "RSS") + XCTAssertEqual(feedLink.type, "application/rss+xml") + XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml") + + XCTAssertEqual(metadata.appleTouchIcons?.count ?? 0, 0); + } + + func testInessentialPerformance() { + + // 0.001 sec on my 2012 iMac + let d = parserData("inessential", "html", "http://inessential.com/") + self.measure { + let _ = HTMLMetadataParser.metadata(with: d) + } + } + + func testCocoPerformance() { + + // 0.004 sec on my 2012 iMac + let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") + self.measure { + let _ = HTMLMetadataParser.metadata(with: d) + } + } + + func testSixColors() { + + let d = parserData("sixcolors", "html", "http://sixcolors.com/") + let metadata = HTMLMetadataParser.metadata(with: d) + + XCTAssertEqual(metadata.favicons?.first?.urlString, "https://sixcolors.com/images/favicon.ico") + + XCTAssertEqual(metadata.feedLinks?.count, 1); + let feedLink = (metadata.feedLinks?.first!)! + XCTAssertEqual(feedLink.title, "RSS"); + XCTAssertEqual(feedLink.type, "application/rss+xml"); + XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors"); + + XCTAssertEqual(metadata.appleTouchIcons!.count, 6); + let icon = metadata.appleTouchIcons![3]; + XCTAssertEqual(icon.rel, "apple-touch-icon"); + XCTAssertEqual(icon.sizes, "120x120"); + XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png"); + } + + func testSixColorsPerformance() { + + // 0.002 sec on my 2012 iMac + let d = parserData("sixcolors", "html", "http://sixcolors.com/") + self.measure { + let _ = HTMLMetadataParser.metadata(with: d) + } + } + + func testCocoOGImage() { + + let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") + let metadata = HTMLMetadataParser.metadata(with: d) + let openGraphData = metadata.openGraphProperties! + let image = openGraphData.image! + XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") + } + + func testCocoTwitterImage() { + + let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") + let metadata = HTMLMetadataParser.metadata(with: d) + let twitterData = metadata.twitterProperties! + let imageURL = twitterData.imageURL! + XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") + } + + func testYouTube() { + // YouTube is a special case — the feed links appear after the head section, in the body section. + let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks") + let metadata = HTMLMetadataParser.metadata(with: d) + + XCTAssertEqual(metadata.feedLinks!.count, 1); + let feedLink = metadata.feedLinks!.first! + XCTAssertEqual(feedLink.title, "RSS"); + XCTAssertEqual(feedLink.type, "application/rss+xml"); + XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ"); + } +} From 2fa07611bfd065ac81ae255e9cd24312da2b2090 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:39:00 -0700 Subject: [PATCH 81/88] Mark test classes as final, because they are. --- Modules/Parser/Tests/ParserTests/AtomParserTests.swift | 2 +- Modules/Parser/Tests/ParserTests/DateParserTests.swift | 2 +- Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift | 2 +- Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift | 2 +- Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift | 2 +- Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift | 2 +- Modules/Parser/Tests/ParserTests/OPMLTests.swift | 2 +- Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift | 2 +- Modules/Parser/Tests/ParserTests/RSSParserTests.swift | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Modules/Parser/Tests/ParserTests/AtomParserTests.swift b/Modules/Parser/Tests/ParserTests/AtomParserTests.swift index 3a83ba994..eb2f738ed 100644 --- a/Modules/Parser/Tests/ParserTests/AtomParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/AtomParserTests.swift @@ -9,7 +9,7 @@ import XCTest import FeedParser -class AtomParserTests: XCTestCase { +final class AtomParserTests: XCTestCase { func testDaringFireballPerformance() { diff --git a/Modules/Parser/Tests/ParserTests/DateParserTests.swift b/Modules/Parser/Tests/ParserTests/DateParserTests.swift index 948e5aacb..3861e15f4 100644 --- a/Modules/Parser/Tests/ParserTests/DateParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/DateParserTests.swift @@ -9,7 +9,7 @@ import Foundation import XCTest @testable import DateParser -class DateParserTests: XCTestCase { +final class DateParserTests: XCTestCase { func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ millisecond: Int = 0) -> Date { var dateComponents = DateComponents() diff --git a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift index 110e698d9..292b6b183 100644 --- a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift +++ b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift @@ -9,7 +9,7 @@ import XCTest import SAX -class EntityDecodingTests: XCTestCase { +final class EntityDecodingTests: XCTestCase { func test39Decoding() { diff --git a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift index 8c76c3f89..8600936e2 100644 --- a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift @@ -10,7 +10,7 @@ import XCTest @testable import FeedParser import SAX -class FeedParserTypeTests: XCTestCase { +final class FeedParserTypeTests: XCTestCase { // MARK: HTML diff --git a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift index ac3c6f362..cdc8834d6 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift @@ -10,7 +10,7 @@ import XCTest import HTMLParser import libxml2 -class HTMLLinkTests: XCTestCase { +final class HTMLLinkTests: XCTestCase { func testSixColorsPerformance() { diff --git a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift index a315481af..3e605ed16 100644 --- a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift @@ -9,7 +9,7 @@ import XCTest import FeedParser -class JSONFeedParserTests: XCTestCase { +final class JSONFeedParserTests: XCTestCase { func testInessentialPerformance() { diff --git a/Modules/Parser/Tests/ParserTests/OPMLTests.swift b/Modules/Parser/Tests/ParserTests/OPMLTests.swift index b8c967dd3..4e8400e56 100644 --- a/Modules/Parser/Tests/ParserTests/OPMLTests.swift +++ b/Modules/Parser/Tests/ParserTests/OPMLTests.swift @@ -10,7 +10,7 @@ import XCTest import SAX @testable import OPMLParser -class OPMLTests: XCTestCase { +final class OPMLTests: XCTestCase { let subsData = parserData("Subs", "opml", "http://example.org/") diff --git a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift index 4412dd062..21c345935 100644 --- a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift @@ -9,7 +9,7 @@ import XCTest import FeedParser -class RSSInJSONParserTests: XCTestCase { +final class RSSInJSONParserTests: XCTestCase { func testScriptingNewsPerformance() { diff --git a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift index 4dea40bee..25f71fad3 100644 --- a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift @@ -9,7 +9,7 @@ import XCTest import FeedParser -class RSSParserTests: XCTestCase { +final class RSSParserTests: XCTestCase { func testScriptingNewsPerformance() { From e752363a60acdbc8a4b28764b7b182710394740c Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 21:42:40 -0700 Subject: [PATCH 82/88] Remove ParserObjC and references to it. --- Modules/Account/Package.swift | 2 - Modules/Account/Sources/Account/Account.swift | 1 - .../CloudKitAccountDelegate.swift | 1 - .../LocalAccountDelegate.swift | 1 - .../Account/Sources/Account/OPMLFile.swift | 1 - .../Sources/Account/OPMLNormalizer.swift | 1 - .../CloudKitSync/CloudKitAccountZone.swift | 1 - .../CloudKitSync/CloudKitArticlesZone.swift | 1 - .../Sources/FeedFinder/FeedFinder.swift | 1 - .../Sources/FeedFinder/HTMLFeedFinder.swift | 1 - .../Sources/Feedbin/FeedbinEntry.swift | 1 - .../Images/Favicons/FaviconDownloader.swift | 1 - .../Images/Favicons/FaviconURLFinder.swift | 1 - .../Sources/Images/FeedIconDownloader.swift | 1 - .../Images/RSHTMLMetadata+Extension.swift | 1 - .../LocalAccount/InitialFeedDownloader.swift | 1 - Modules/ParserObjC/.gitignore | 8 - Modules/ParserObjC/Package.swift | 26 - .../Sources/ParserObjC/NSData+RSParser.h | 26 - .../Sources/ParserObjC/NSData+RSParser.m | 139 ---- .../Sources/ParserObjC/NSString+RSParser.h | 26 - .../Sources/ParserObjC/NSString+RSParser.m | 348 --------- .../Sources/ParserObjC/RSAtomParser.h | 18 - .../Sources/ParserObjC/RSAtomParser.m | 679 ------------------ .../Sources/ParserObjC/RSDateParser.h | 22 - .../Sources/ParserObjC/RSDateParser.m | 461 ------------ .../Sources/ParserObjC/RSHTMLLinkParser.h | 35 - .../Sources/ParserObjC/RSHTMLLinkParser.m | 154 ---- .../Sources/ParserObjC/RSHTMLMetadata.h | 98 --- .../Sources/ParserObjC/RSHTMLMetadata.m | 483 ------------- .../Sources/ParserObjC/RSHTMLMetadataParser.h | 24 - .../Sources/ParserObjC/RSHTMLMetadataParser.m | 151 ---- .../Sources/ParserObjC/RSParsedArticle.h | 37 - .../Sources/ParserObjC/RSParsedArticle.m | 134 ---- .../Sources/ParserObjC/RSParsedFeed.h | 23 - .../Sources/ParserObjC/RSParsedFeed.m | 32 - .../Sources/ParserObjC/RSParserInternal.h | 24 - .../Sources/ParserObjC/RSParserInternal.m | 61 -- .../Sources/ParserObjC/RSRSSParser.h | 19 - .../Sources/ParserObjC/RSRSSParser.m | 523 -------------- .../Sources/ParserObjC/RSSAXHTMLParser.h | 55 -- .../Sources/ParserObjC/RSSAXHTMLParser.m | 321 --------- .../Sources/ParserObjC/include/RSParser.h | 56 -- NetNewsWire.xcodeproj/project.pbxproj | 27 - Shared/AppDelegate+Shared.swift | 1 - .../HTMLMetadata/HTMLMetadataDownloader.swift | 1 - 46 files changed, 4029 deletions(-) delete mode 100644 Modules/ParserObjC/.gitignore delete mode 100644 Modules/ParserObjC/Package.swift delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h delete mode 100755 Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m delete mode 100644 Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h diff --git a/Modules/Account/Package.swift b/Modules/Account/Package.swift index 48ee3e33e..539cb3ea5 100644 --- a/Modules/Account/Package.swift +++ b/Modules/Account/Package.swift @@ -12,7 +12,6 @@ let package = Package( ], dependencies: [ .package(path: "../Parser"), - .package(path: "../ParserObjC"), .package(path: "../Articles"), .package(path: "../ArticlesDatabase"), .package(path: "../Web"), @@ -35,7 +34,6 @@ let package = Package( name: "Account", dependencies: [ "Parser", - "ParserObjC", "Web", "Articles", "ArticlesDatabase", diff --git a/Modules/Account/Sources/Account/Account.swift b/Modules/Account/Sources/Account/Account.swift index 278fe5cfe..efc42748b 100644 --- a/Modules/Account/Sources/Account/Account.swift +++ b/Modules/Account/Sources/Account/Account.swift @@ -13,7 +13,6 @@ import UIKit import Foundation import Articles import Parser -import ParserObjC import Database import ArticlesDatabase import Web diff --git a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift index 3e83b9da9..544a30e93 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift @@ -12,7 +12,6 @@ import SystemConfiguration import os.log import SyncDatabase import Parser -import ParserObjC import Articles import ArticlesDatabase import Web diff --git a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift index 01ec34b3c..00a9b05f0 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift @@ -9,7 +9,6 @@ import Foundation import os.log import Parser -import ParserObjC import Articles import ArticlesDatabase import Web diff --git a/Modules/Account/Sources/Account/OPMLFile.swift b/Modules/Account/Sources/Account/OPMLFile.swift index c44b15d69..a8fe0593f 100644 --- a/Modules/Account/Sources/Account/OPMLFile.swift +++ b/Modules/Account/Sources/Account/OPMLFile.swift @@ -9,7 +9,6 @@ import Foundation import os import Parser -import ParserObjC import Core @MainActor final class OPMLFile { diff --git a/Modules/Account/Sources/Account/OPMLNormalizer.swift b/Modules/Account/Sources/Account/OPMLNormalizer.swift index bfad43687..ab6111f89 100644 --- a/Modules/Account/Sources/Account/OPMLNormalizer.swift +++ b/Modules/Account/Sources/Account/OPMLNormalizer.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC final class OPMLNormalizer { diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift index 8f3b566a5..e4a18cc51 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift @@ -10,7 +10,6 @@ import Foundation import os.log import Web import Parser -import ParserObjC import CloudKit import FoundationExtras diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift index e0c98cccd..c5aba8fa4 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift @@ -9,7 +9,6 @@ import Foundation import os.log import Parser -import ParserObjC import Web import CloudKit import Articles diff --git a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift index 738f7b465..d2dd7cfc1 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC import Web import CommonErrors import os.log diff --git a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift index f755dbc20..805bc047b 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift @@ -9,7 +9,6 @@ import Foundation import FoundationExtras import Parser -import ParserObjC private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"] diff --git a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift index 809ea799c..03e90215a 100644 --- a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift +++ b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC public final class FeedbinEntry: Decodable, @unchecked Sendable { diff --git a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift index 31670d7df..4055b7b73 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift @@ -12,7 +12,6 @@ import Articles import Account import UniformTypeIdentifiers import Core -import ParserObjC public extension Notification.Name { static let FaviconDidBecomeAvailable = Notification.Name("FaviconDidBecomeAvailableNotification") // userInfo key: FaviconDownloader.UserInfoKey.faviconURL diff --git a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift index b3e99fa13..f959ff3b1 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift @@ -9,7 +9,6 @@ import Foundation import CoreServices import Parser -import ParserObjC import UniformTypeIdentifiers // The favicon URLs may be specified in the head section of the home page. diff --git a/Modules/Images/Sources/Images/FeedIconDownloader.swift b/Modules/Images/Sources/Images/FeedIconDownloader.swift index 35066ff42..55a66363c 100644 --- a/Modules/Images/Sources/Images/FeedIconDownloader.swift +++ b/Modules/Images/Sources/Images/FeedIconDownloader.swift @@ -11,7 +11,6 @@ import Articles import Account import Web import Parser -import ParserObjC import Core public extension Notification.Name { diff --git a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift b/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift index d421d7f33..fd5da45d7 100644 --- a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift +++ b/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC extension RSHTMLMetadata { diff --git a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift index e3a408108..142833b02 100644 --- a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift +++ b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC import Web public struct InitialFeedDownloader { diff --git a/Modules/ParserObjC/.gitignore b/Modules/ParserObjC/.gitignore deleted file mode 100644 index 0023a5340..000000000 --- a/Modules/ParserObjC/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.DS_Store -/.build -/Packages -xcuserdata/ -DerivedData/ -.swiftpm/configuration/registries.json -.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata -.netrc diff --git a/Modules/ParserObjC/Package.swift b/Modules/ParserObjC/Package.swift deleted file mode 100644 index 69281ca1d..000000000 --- a/Modules/ParserObjC/Package.swift +++ /dev/null @@ -1,26 +0,0 @@ -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "ParserObjC", - platforms: [.macOS(.v14), .iOS(.v17)], - products: [ - // Products define the executables and libraries a package produces, making them visible to other packages. - .library( - name: "ParserObjC", - type: .dynamic, - targets: ["ParserObjC"]), - ], - targets: [ - // Targets are the basic building blocks of a package, defining a module or a test suite. - // Targets can depend on other targets in this package and products from dependencies. - .target( - name: "ParserObjC", - cSettings: [ - .headerSearchPath("include") - ] - ), - ] -) diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h b/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h deleted file mode 100644 index be2d892f8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// NSData+RSParser.h -// RSParser -// -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@interface NSData (RSParser) - -- (BOOL)isProbablyHTML; -- (BOOL)isProbablyXML; -- (BOOL)isProbablyJSON; - -- (BOOL)isProbablyJSONFeed; -- (BOOL)isProbablyRSSInJSON; -- (BOOL)isProbablyRSS; -- (BOOL)isProbablyAtom; - -@end - - - diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m b/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m deleted file mode 100644 index 8ac9aa167..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m +++ /dev/null @@ -1,139 +0,0 @@ -// -// NSData+RSParser.m -// RSParser -// -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "NSData+RSParser.h" - - - - -/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/ - -static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes); -static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes); - -@implementation NSData (RSParser) - -- (BOOL)isProbablyHTML { - - return bytesAreProbablyHTML(self.bytes, self.length); -} - -- (BOOL)isProbablyXML { - - return bytesAreProbablyXML(self.bytes, self.length); -} - -- (BOOL)isProbablyJSON { - - return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length); -} - -- (BOOL)isProbablyJSONFeed { - - if (![self isProbablyJSON]) { - return NO; - } - return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length); -} - -- (BOOL)isProbablyRSSInJSON { - - if (![self isProbablyJSON]) { - return NO; - } - const char *bytes = self.bytes; - NSUInteger length = self.length; - return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length); -} - -- (BOOL)isProbablyRSS { - - if (didFindString(" tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find. - return (didFindString("", self.bytes, self.length) && didFindString("", self.bytes, self.length)); -} - -- (BOOL)isProbablyAtom { - - return didFindString(", and & entity-encoded. -@property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities; - -- (NSString *)rsparser_md5Hash; - -- (BOOL)rsparser_contains:(NSString *)s; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m b/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m deleted file mode 100755 index 8a4e7d114..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m +++ /dev/null @@ -1,348 +0,0 @@ -// -// NSString+RSParser.m -// RSParser -// -// Created by Brent Simmons on 9/25/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "NSString+RSParser.h" -#import - - - - -@interface NSScanner (RSParser) - -- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity; - -@end - - -@implementation NSString (RSParser) - -- (BOOL)rsparser_contains:(NSString *)s { - - return [self rangeOfString:s].location != NSNotFound; -} - -- (NSString *)rsparser_stringByDecodingHTMLEntities { - - @autoreleasepool { - - NSScanner *scanner = [[NSScanner alloc] initWithString:self]; - scanner.charactersToBeSkipped = nil; - NSMutableString *result = [[NSMutableString alloc] init]; - - while (true) { - - NSString *scannedString = nil; - if ([scanner scanUpToString:@"&" intoString:&scannedString]) { - [result appendString:scannedString]; - } - if (scanner.isAtEnd) { - break; - } - NSUInteger savedScanLocation = scanner.scanLocation; - - NSString *decodedEntity = nil; - if ([scanner rs_scanEntityValue:&decodedEntity]) { - [result appendString:decodedEntity]; - } - else { - [result appendString:@"&"]; - scanner.scanLocation = savedScanLocation + 1; - } - - if (scanner.isAtEnd) { - break; - } - } - - if ([self isEqualToString:result]) { - return self; - } - return [result copy]; - } -} - - -static NSDictionary *RSEntitiesDictionary(void); -static NSString *RSParserStringWithValue(uint32_t value); - -- (NSString * _Nullable)rs_stringByDecodingEntity { - - // self may or may not have outer & and ; characters. - - NSMutableString *s = [self mutableCopy]; - - if ([s hasPrefix:@"&"]) { - [s deleteCharactersInRange:NSMakeRange(0, 1)]; - } - if ([s hasSuffix:@";"]) { - [s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)]; - } - - NSDictionary *entitiesDictionary = RSEntitiesDictionary(); - - NSString *decodedEntity = entitiesDictionary[self]; - if (decodedEntity) { - return decodedEntity; - } - - if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex - NSScanner *scanner = [[NSScanner alloc] initWithString:s]; - scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"]; - unsigned int hexValue = 0; - if ([scanner scanHexInt:&hexValue]) { - return RSParserStringWithValue((uint32_t)hexValue); - } - return nil; - } - - else if ([s hasPrefix:@"#"]) { - [s deleteCharactersInRange:NSMakeRange(0, 1)]; - NSInteger value = s.integerValue; - if (value < 1) { - return nil; - } - return RSParserStringWithValue((uint32_t)value); - } - - return nil; -} - -- (NSString *)rsparser_stringByEncodingRequiredEntities { - NSMutableString *result = [NSMutableString string]; - - for (NSUInteger i = 0; i < self.length; ++i) { - unichar c = [self characterAtIndex:i]; - - switch (c) { - case '<': - [result appendString:@"<"]; - break; - case '>': - [result appendString:@">"]; - break; - case '&': - [result appendString:@"&"]; - break; - default: - [result appendFormat:@"%C", c]; - break; - } - } - - return [result copy]; -} - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -- (NSData *)_rsparser_md5HashData { - - NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; - unsigned char hash[CC_MD5_DIGEST_LENGTH]; - CC_MD5(data.bytes, (CC_LONG)data.length, hash); - - return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; -} -#pragma GCC diagnostic pop - -- (NSString *)rsparser_md5Hash { - - NSData *md5Data = [self _rsparser_md5HashData]; - const Byte *bytes = md5Data.bytes; - return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; -} - - -@end - -@implementation NSScanner (RSParser) - -- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { - - NSString *s = self.string; - NSUInteger initialScanLocation = self.scanLocation; - static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. - - while (true) { - - unichar ch = [s characterAtIndex:self.scanLocation]; - if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { - break; - } - if (ch == ';') { - if (!decodedEntity) { - return YES; - } - NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; - *decodedEntity = [rawEntity rs_stringByDecodingEntity]; - self.scanLocation = self.scanLocation + 1; - return *decodedEntity != nil; - } - - self.scanLocation = self.scanLocation + 1; - if (self.scanLocation - initialScanLocation > maxEntityLength) { - break; - } - if (self.isAtEnd) { - break; - } - } - - return NO; -} - -@end - -static NSString *RSParserStringWithValue(uint32_t value) { - // From WebCore's HTMLEntityParser - static const uint32_t windowsLatin1ExtensionArray[32] = { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F - }; - - if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160 - value = windowsLatin1ExtensionArray[value - 0x80]; - } - - value = CFSwapInt32HostToLittle(value); - - return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding]; -} - -static NSDictionary *RSEntitiesDictionary(void) { - - static NSDictionary *entitiesDictionary = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - - entitiesDictionary = @{ - // Named entities - @"AElig": @"Æ", - @"Aacute": @"Á", - @"Acirc": @"Â", - @"Agrave": @"À", - @"Aring": @"Å", - @"Atilde": @"Ã", - @"Auml": @"Ä", - @"Ccedil": @"Ç", - @"Dstrok": @"Ð", - @"ETH": @"Ð", - @"Eacute": @"É", - @"Ecirc": @"Ê", - @"Egrave": @"È", - @"Euml": @"Ë", - @"Iacute": @"Í", - @"Icirc": @"Î", - @"Igrave": @"Ì", - @"Iuml": @"Ï", - @"Ntilde": @"Ñ", - @"Oacute": @"Ó", - @"Ocirc": @"Ô", - @"Ograve": @"Ò", - @"Oslash": @"Ø", - @"Otilde": @"Õ", - @"Ouml": @"Ö", - @"Pi": @"Π", - @"THORN": @"Þ", - @"Uacute": @"Ú", - @"Ucirc": @"Û", - @"Ugrave": @"Ù", - @"Uuml": @"Ü", - @"Yacute": @"Y", - @"aacute": @"á", - @"acirc": @"â", - @"acute": @"´", - @"aelig": @"æ", - @"agrave": @"à", - @"amp": @"&", - @"apos": @"'", - @"aring": @"å", - @"atilde": @"ã", - @"auml": @"ä", - @"brkbar": @"¦", - @"brvbar": @"¦", - @"ccedil": @"ç", - @"cedil": @"¸", - @"cent": @"¢", - @"copy": @"©", - @"curren": @"¤", - @"deg": @"°", - @"die": @"¨", - @"divide": @"÷", - @"eacute": @"é", - @"ecirc": @"ê", - @"egrave": @"è", - @"eth": @"ð", - @"euml": @"ë", - @"euro": @"€", - @"frac12": @"½", - @"frac14": @"¼", - @"frac34": @"¾", - @"gt": @">", - @"hearts": @"♥", - @"hellip": @"…", - @"iacute": @"í", - @"icirc": @"î", - @"iexcl": @"¡", - @"igrave": @"ì", - @"iquest": @"¿", - @"iuml": @"ï", - @"laquo": @"«", - @"ldquo": @"“", - @"lsquo": @"‘", - @"lt": @"<", - @"macr": @"¯", - @"mdash": @"—", - @"micro": @"µ", - @"middot": @"·", - @"ndash": @"–", - @"not": @"¬", - @"ntilde": @"ñ", - @"oacute": @"ó", - @"ocirc": @"ô", - @"ograve": @"ò", - @"ordf": @"ª", - @"ordm": @"º", - @"oslash": @"ø", - @"otilde": @"õ", - @"ouml": @"ö", - @"para": @"¶", - @"pi": @"π", - @"plusmn": @"±", - @"pound": @"£", - @"quot": @"\"", - @"raquo": @"»", - @"rdquo": @"”", - @"reg": @"®", - @"rsquo": @"’", - @"sect": @"§", - @"shy": RSParserStringWithValue(173), - @"sup1": @"¹", - @"sup2": @"²", - @"sup3": @"³", - @"szlig": @"ß", - @"thorn": @"þ", - @"times": @"×", - @"trade": @"™", - @"uacute": @"ú", - @"ucirc": @"û", - @"ugrave": @"ù", - @"uml": @"¨", - @"uuml": @"ü", - @"yacute": @"y", - @"yen": @"¥", - @"yuml": @"ÿ", - @"infin": @"∞", - @"nbsp": RSParserStringWithValue(160) - }; - }); - - return entitiesDictionary; -} diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h deleted file mode 100755 index 27b5d80e4..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h +++ /dev/null @@ -1,18 +0,0 @@ -// -// RSAtomParser.h -// RSParser -// -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class ParserData; -@class RSParsedFeed; - -@interface RSAtomParser : NSObject - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m deleted file mode 100755 index eaaeeb638..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m +++ /dev/null @@ -1,679 +0,0 @@ -// -// RSAtomParser.m -// RSParser -// -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - - -#import "RSAtomParser.h" -#import "RSSAXParser.h" -#import "RSParsedFeed.h" -#import "RSParsedArticle.h" -#import "NSString+RSParser.h" -#import "RSDateParser.h" -#import "ParserData.h" -#import "RSParsedEnclosure.h" -#import "RSParsedAuthor.h" - -#import - -@interface RSAtomParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) BOOL endFeedFound; -@property (nonatomic) BOOL parsingXHTML; -@property (nonatomic) BOOL parsingSource; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic) NSMutableArray *attributesStack; -@property (nonatomic, readonly) NSDictionary *currentAttributes; -@property (nonatomic) NSMutableString *xhtmlString; -@property (nonatomic) NSString *link; -@property (nonatomic) NSString *title; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) RSParsedAuthor *currentAuthor; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) NSString *language; - -@end - - -@implementation RSAtomParser - -#pragma mark - Class Methods - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData { - - RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData]; - return [parser parseFeed]; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = parserData.data; - _urlString = parserData.url; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _attributesStack = [NSMutableArray new]; - _articles = [NSMutableArray new]; - - return self; -} - - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles]; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kTypeKey = @"type"; -static NSString *kXHTMLType = @"xhtml"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateValue = @"alternate"; -static NSString *kHrefKey = @"href"; -static NSString *kXMLKey = @"xml"; -static NSString *kBaseKey = @"base"; -static NSString *kLangKey = @"lang"; -static NSString *kXMLBaseKey = @"xml:base"; -static NSString *kXMLLangKey = @"xml:lang"; -static NSString *kTextHTMLValue = @"text/html"; -static NSString *kRelatedValue = @"related"; -static NSString *kEnclosureValue = @"enclosure"; -static NSString *kShortURLValue = @"shorturl"; -static NSString *kHTMLValue = @"html"; -static NSString *kEnValue = @"en"; -static NSString *kTextValue = @"text"; -static NSString *kSelfValue = @"self"; -static NSString *kLengthKey = @"length"; -static NSString *kTitleKey = @"title"; - -static const char *kID = "id"; -static const NSInteger kIDLength = 3; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kSummary = "summary"; -static const NSInteger kSummaryLength = 8; - -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -static const char *kPublished = "published"; -static const NSInteger kPublishedLength = 10; - -static const char *kIssued = "issued"; -static const NSInteger kIssuedLength = 7; - -static const char *kUpdated = "updated"; -static const NSInteger kUpdatedLength = 8; - -static const char *kModified = "modified"; -static const NSInteger kModifiedLength = 9; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kName = "name"; -static const NSInteger kNameLength = 5; - -static const char *kEmail = "email"; -static const NSInteger kEmailLength = 6; - -static const char *kURI = "uri"; -static const NSInteger kURILength = 4; - -static const char *kEntry = "entry"; -static const NSInteger kEntryLength = 6; - -static const char *kSource = "source"; -static const NSInteger kSourceLength = 7; - -static const char *kFeed = "feed"; -static const NSInteger kFeedLength = 5; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kRel = "rel"; -static const NSInteger kRelLength = 4; - -static const char *kAlternate = "alternate"; -static const NSInteger kAlternateLength = 10; - -static const char *kHref = "href"; -static const NSInteger kHrefLength = 5; - -static const char *kXML = "xml"; -static const NSInteger kXMLLength = 4; - -static const char *kBase = "base"; -static const NSInteger kBaseLength = 5; - -static const char *kLang = "lang"; -static const NSInteger kLangLength = 5; - -static const char *kTextHTML = "text/html"; -static const NSInteger kTextHTMLLength = 10; - -static const char *kRelated = "related"; -static const NSInteger kRelatedLength = 8; - -static const char *kShortURL = "shorturl"; -static const NSInteger kShortURLLength = 9; - -static const char *kHTML = "html"; -static const NSInteger kHTMLLength = 5; - -static const char *kEn = "en"; -static const NSInteger kEnLength = 3; - -static const char *kText = "text"; -static const NSInteger kTextLength = 5; - -static const char *kSelf = "self"; -static const NSInteger kSelfLength = 5; - -static const char *kEnclosure = "enclosure"; -static const NSInteger kEnclosureLength = 10; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } -} - - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (NSDictionary *)currentAttributes { - - return self.attributesStack.lastObject; -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - -- (void)addFeedLink { - - if (self.link && self.link.length > 0) { - return; - } - - NSString *related = self.currentAttributes[kRelKey]; - if (related == kAlternateValue) { - self.link = self.currentAttributes[kHrefKey]; - } -} - - -- (void)addFeedTitle { - - if (self.title.length < 1) { - self.title = [self currentString]; - } -} - -- (void)addFeedLanguage { - - if (self.language.length < 0) { - self.language = self.currentAttributes[kXMLLangKey] -; - } -} - -- (void)addLink { - - NSDictionary *attributes = self.currentAttributes; - - NSString *urlString = attributes[kHrefKey]; - if (urlString.length < 1) { - return; - } - - RSParsedArticle *article = self.currentArticle; - - NSString *rel = attributes[kRelKey]; - if (rel.length < 1) { - rel = kAlternateValue; - } - - if (rel == kRelatedValue) { - if (!article.link) { - article.link = urlString; - } - } - else if (rel == kAlternateValue) { - if (!article.permalink) { - article.permalink = urlString; - } - } - else if (rel == kEnclosureValue) { - RSParsedEnclosure *enclosure = [self enclosureWithURLString:urlString attributes:attributes]; - [article addEnclosure:enclosure]; - } -} - -- (RSParsedEnclosure *)enclosureWithURLString:(NSString *)urlString attributes:(NSDictionary *)attributes { - - RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init]; - enclosure.url = urlString; - enclosure.title = attributes[kTitleKey]; - enclosure.mimeType = attributes[kTypeKey]; - enclosure.length = [attributes[kLengthKey] integerValue]; - - return enclosure; -} - -- (void)addContent { - - self.currentArticle.body = [self currentString]; -} - - -- (void)addSummary { - - if (!self.currentArticle.body) { - self.currentArticle.body = [self currentString]; - } -} - - -- (NSString *)currentString { - - return self.parser.currentStringWithTrimmedWhitespace; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix) { - return; - } - - if (RSSAXEqualTags(localName, kID, kIDLength)) { - self.currentArticle.guid = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.currentArticle.title = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kContent, kContentLength)) { - [self addContent]; - } - - else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) { - [self addSummary]; - } - - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addLink]; - } - - else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) { - self.currentArticle.datePublished = self.currentDate; - } - - else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) { - self.currentArticle.dateModified = self.currentDate; - } - - // Atom 0.3 dates - else if (RSSAXEqualTags(localName, kIssued, kIssuedLength)) { - if (!self.currentArticle.datePublished) { - self.currentArticle.datePublished = self.currentDate; - } - } - else if (RSSAXEqualTags(localName, kModified, kModifiedLength)) { - if (!self.currentArticle.dateModified) { - self.currentArticle.dateModified = self.currentDate; - } - } -} - - -- (void)addXHTMLTag:(const xmlChar *)localName { - - if (!localName) { - return; - } - - [self.xhtmlString appendString:@"<"]; - [self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]]; - - if (self.currentAttributes.count < 1) { - [self.xhtmlString appendString:@">"]; - return; - } - - for (NSString *oneKey in self.currentAttributes) { - - [self.xhtmlString appendString:@" "]; - - NSString *oneValue = self.currentAttributes[oneKey]; - [self.xhtmlString appendString:oneKey]; - - [self.xhtmlString appendString:@"=\""]; - - oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"""]; - [self.xhtmlString appendString:oneValue]; - - [self.xhtmlString appendString:@"\""]; - } - - [self.xhtmlString appendString:@">"]; -} - - -#pragma mark - RSSAXParserDelegate - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (self.endFeedFound) { - return; - } - - NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - if (!xmlAttributes) { - xmlAttributes = [NSDictionary dictionary]; - } - [self.attributesStack addObject:xmlAttributes]; - - if (self.parsingXHTML) { - [self addXHTMLTag:localName]; - return; - } - - if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = YES; - [self addArticle]; - return; - } - - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = YES; - self.currentAuthor = [[RSParsedAuthor alloc] init]; - return; - } - - if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = YES; - return; - } - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - if (isContentTag) { - self.currentArticle.language = xmlAttributes[kXMLLangKey]; - } - - NSString *contentType = xmlAttributes[kTypeKey]; - if ([contentType isEqualToString:kXHTMLType]) { - self.parsingXHTML = YES; - self.xhtmlString = [NSMutableString stringWithString:@""]; - return; - } - } - - if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addFeedLink]; - return; - } - - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - [self addFeedLanguage]; - } - - [self.parser beginStoringCharacters]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - self.endFeedFound = YES; - return; - } - - if (self.endFeedFound) { - return; - } - - if (self.parsingXHTML) { - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - if (isContentTag) { - self.currentArticle.body = [self.xhtmlString copy]; - } - - else if (isSummaryTag) { - if (self.currentArticle.body.length < 1) { - self.currentArticle.body = [self.xhtmlString copy]; - } - } - } - - if (isContentTag || isSummaryTag) { - self.parsingXHTML = NO; - } - - [self.xhtmlString appendString:@""]; - } - - else if (self.parsingAuthor) { - - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - RSParsedAuthor *author = self.currentAuthor; - if (author.name || author.emailAddress || author.url) { - [self.currentArticle addAuthor:author]; - } - self.currentAuthor = nil; - } - else if (RSSAXEqualTags(localName, kName, kNameLength)) { - self.currentAuthor.name = [self currentString]; - } - else if (RSSAXEqualTags(localName, kEmail, kEmailLength)) { - self.currentAuthor.emailAddress = [self currentString]; - } - else if (RSSAXEqualTags(localName, kURI, kURILength)) { - self.currentAuthor.url = [self currentString]; - } - } - - else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle && !self.parsingSource) { - [self addArticleElement:localName prefix:prefix]; - } - - else if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = NO; - } - - else if (!self.parsingArticle && !self.parsingSource && RSSAXEqualTags(localName, kTitle, kTitleLength)) { - [self addFeedTitle]; - } - - [self.attributesStack removeLastObject]; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) { - - if (RSSAXEqualTags(name, kBase, kBaseLength)) { - return kXMLBaseKey; - } - if (RSSAXEqualTags(name, kLang, kLangLength)) { - return kXMLLangKey; - } - } - - if (prefix) { - return nil; - } - - if (RSSAXEqualTags(name, kRel, kRelLength)) { - return kRelKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - if (RSSAXEqualTags(name, kHref, kHrefLength)) { - return kHrefKey; - } - - if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) { - return kAlternateValue; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kTitle, kTitleLength)) { - return kTitleKey; - } - - return nil; -} - - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger alternateLength = kAlternateLength - 1; - static const NSUInteger textHTMLLength = kTextHTMLLength - 1; - static const NSUInteger relatedLength = kRelatedLength - 1; - static const NSUInteger shortURLLength = kShortURLLength - 1; - static const NSUInteger htmlLength = kHTMLLength - 1; - static const NSUInteger enLength = kEnLength - 1; - static const NSUInteger textLength = kTextLength - 1; - static const NSUInteger selfLength = kSelfLength - 1; - static const NSUInteger enclosureLength = kEnclosureLength - 1; - - if (length == alternateLength && equalBytes(bytes, kAlternate, alternateLength)) { - return kAlternateValue; - } - - if (length == enclosureLength && equalBytes(bytes, kEnclosure, enclosureLength)) { - return kEnclosureValue; - } - - if (length == textHTMLLength && equalBytes(bytes, kTextHTML, textHTMLLength)) { - return kTextHTMLValue; - } - - if (length == relatedLength && equalBytes(bytes, kRelated, relatedLength)) { - return kRelatedValue; - } - - if (length == shortURLLength && equalBytes(bytes, kShortURL, shortURLLength)) { - return kShortURLValue; - } - - if (length == htmlLength && equalBytes(bytes, kHTML, htmlLength)) { - return kHTMLValue; - } - - if (length == enLength && equalBytes(bytes, kEn, enLength)) { - return kEnValue; - } - - if (length == textLength && equalBytes(bytes, kText, textLength)) { - return kTextValue; - } - - if (length == selfLength && equalBytes(bytes, kSelf, selfLength)) { - return kSelfValue; - } - - return nil; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length { - - if (self.parsingXHTML) { - NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)characters length:length encoding:NSUTF8StringEncoding freeWhenDone:NO]; - if (s == nil) { - return; - } - // libxml decodes all entities; we need to re-encode certain characters - // (<, >, and &) when inside XHTML text content. - [self.xhtmlString appendString:s.rsparser_stringByEncodingRequiredEntities]; - } -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h deleted file mode 100755 index 5c3745a32..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSDateParser.h -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -// Common web dates -- RFC 822 and 8601 -- are handled here: the formats you find in JSON and XML feeds. -// These may return nil. They may also return garbage, given bad input. - -NSDate *RSDateWithString(NSString *dateString); - -// If you're using a SAX parser, you have the bytes and don't need to convert to a string first. -// It's faster and uses less memory. -// (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.) - -NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes); - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m deleted file mode 100755 index cb9c572d3..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m +++ /dev/null @@ -1,461 +0,0 @@ -// -// RSDateParser.m -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSDateParser.h" -#import - - -typedef struct { - const char *abbreviation; - const NSInteger offsetHours; - const NSInteger offsetMinutes; -} RSTimeZoneAbbreviationAndOffset; - - -#define kNumberOfTimeZones 96 - -static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = { - {"GMT", 0, 0}, //Most common at top, for performance - {"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0}, - {"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0}, - {"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0}, - {"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0}, - {"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0}, - {"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0}, - {"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30}, - {"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0}, - {"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0}, - {"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0}, - {"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0}, - {"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0}, - {"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0}, - {"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0}, - {"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0}, - {"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30}, - {"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0}, - {"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45}, - {"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0}, - {"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0}, - {"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0}, - {"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0}, - {"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0}, - {"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0} -}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/ - - - -#pragma mark - Parser - -enum { - RSJanuary = 1, - RSFebruary, - RSMarch, - RSApril, - RSMay, - RSJune, - RSJuly, - RSAugust, - RSSeptember, - RSOctober, - RSNovember, - RSDecember -}; - -static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) { - - /*Months are 1-based -- January is 1, Dec is 12. - Lots of short-circuits here. Not strict. GIGO.*/ - - NSUInteger i;// = startingIndex; - NSUInteger numberOfAlphaCharactersFound = 0; - char monthCharacters[3] = {0, 0, 0}; - - for (i = startingIndex; i < numberOfBytes; i++) { - - *finalIndex = i; - char character = bytes[i]; - - BOOL isAlphaCharacter = (BOOL)isalpha(character); - if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1) - continue; - if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0) - break; - - numberOfAlphaCharactersFound++; - if (numberOfAlphaCharactersFound == 1) { - if (character == 'F' || character == 'f') - return RSFebruary; - if (character == 'S' || character == 's') - return RSSeptember; - if (character == 'O' || character == 'o') - return RSOctober; - if (character == 'N' || character == 'n') - return RSNovember; - if (character == 'D' || character == 'd') - return RSDecember; - } - - monthCharacters[numberOfAlphaCharactersFound - 1] = character; - if (numberOfAlphaCharactersFound >=3) - break; - } - - if (numberOfAlphaCharactersFound < 2) - return NSNotFound; - - if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul - if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A') - return RSJanuary; - if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') { - if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N') - return RSJune; - return RSJuly; - } - return RSJanuary; - } - - if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May - if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y') - return RSMay; - return RSMarch; - } - - if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August - if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') - return RSAugust; - return RSApril; - } - - return RSJanuary; //should never get here -} - - -static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) { - - /*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years). - *finalIndex will be the index of the last character looked at.*/ - - if (maximumNumberOfDigits > 4) - maximumNumberOfDigits = 4; - - NSUInteger i = 0; - NSUInteger numberOfDigitsFound = 0; - NSInteger digits[4] = {0, 0, 0, 0}; - - for (i = startingIndex; i < numberOfBytes; i++) { - *finalIndex = i; - BOOL isDigit = (BOOL)isdigit(bytes[i]); - if (!isDigit && numberOfDigitsFound < 1) - continue; - if (!isDigit && numberOfDigitsFound > 0) - break; - digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48 - numberOfDigitsFound++; - if (numberOfDigitsFound >= maximumNumberOfDigits) - break; - } - - if (numberOfDigitsFound < 1) - return NSNotFound; - if (numberOfDigitsFound == 1) - return digits[0]; - if (numberOfDigitsFound == 2) - return (digits[0] * 10) + digits[1]; - if (numberOfDigitsFound == 3) - return (digits[0] * 100) + (digits[1] * 10) + digits[2]; - return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3]; -} - - -static BOOL hasAtLeastOneAlphaCharacter(const char *s) { - - NSUInteger length = strlen(s); - NSUInteger i = 0; - - for (i = 0; i < length; i++) { - if (isalpha(s[i])) - return YES; - } - - return NO; -} - - -#pragma mark - Time Zones and offsets - -static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) { - - /*Linear search should be fine. It's a C array, and short (under 100 items). - Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/ - - NSUInteger i; - - for (i = 0; i < kNumberOfTimeZones; i++) { - - RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i]; - if (strcmp(abbreviation, zone.abbreviation) == 0) { - if (zone.offsetHours < 0) - return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60); - return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60); - } - } - - return 0; -} - - -static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) { - - BOOL isPlus = timeZoneCharacters[0] == '+'; - NSUInteger finalIndex = 0; - NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex); - NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex); - - if (hours == NSNotFound) - hours = 0; - if (minutes == NSNotFound) - minutes = 0; - if (hours == 0 && minutes == 0) - return 0; - - NSInteger seconds = (hours * 60 * 60) + (minutes * 60); - if (!isPlus) - seconds = 0 - seconds; - return seconds; -} - - -static const char *rs_GMT = "GMT"; -static const char *rs_UTC = "UTC"; - -static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) { - - /*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST - Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT. - If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/ - - char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character - NSUInteger i = 0; - NSUInteger numberOfCharactersFound = 0; - - for (i = startingIndex; i < numberOfBytes; i++) { - char ch = bytes[i]; - if (ch == ':' || ch == ' ') - continue; - if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') { - numberOfCharactersFound++; - timeZoneCharacters[numberOfCharactersFound - 1] = ch; - } - if (numberOfCharactersFound >= 5) - break; - } - - if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z') - return 0; - if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC)) - return 0; - - if (hasAtLeastOneAlphaCharacter(timeZoneCharacters)) - return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters); - return offsetInSecondsForOffsetCharacters(timeZoneCharacters); -} - - -#pragma mark - Date Creation - -static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) { - - struct tm timeInfo; - timeInfo.tm_sec = (int)second; - timeInfo.tm_min = (int)minute; - timeInfo.tm_hour = (int)hour; - timeInfo.tm_mday = (int)day; - timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in - timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900 - timeInfo.tm_wday = -1; - timeInfo.tm_yday = -1; - timeInfo.tm_isdst = -1; - timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT]; - timeInfo.tm_zone = nil; - - NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone) - if (rawTime == (time_t)ULONG_MAX) { - - /*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/ - - NSDateComponents *dateComponents = [NSDateComponents new]; - - dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset]; - dateComponents.year = year; - dateComponents.month = month; - dateComponents.day = day; - dateComponents.hour = hour; - dateComponents.minute = minute; - dateComponents.second = second + (milliseconds / 1000); - - return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents]; - } - - if (milliseconds > 0) { - rawTime += ((float)milliseconds / 1000.0f); - } - - return [NSDate dateWithTimeIntervalSince1970:rawTime]; -} - - -#pragma mark - Standard Formats - -static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - /*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ" - @"EEE, dd MMM yyyy HH:mm:ss zzz" - @"dd MMM yyyy HH:mm zzz" - @"dd MMM yyyy HH:mm ZZZ" - @"EEE, dd MMM yyyy" - @"EEE, dd MMM yyyy HH:mm zzz" - etc.*/ - - NSUInteger finalIndex = 0; - NSInteger day = 1; - NSInteger month = RSJanuary; - NSInteger year = 1970; - NSInteger hour = 0; - NSInteger minute = 0; - NSInteger second = 0; - NSInteger timeZoneOffset = 0; - - day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex); - if (day < 1 || day == NSNotFound) - day = 1; - - month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex); - year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex); - hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - if (hour == NSNotFound) - hour = 0; - - minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - if (minute == NSNotFound) - minute = 0; - - NSUInteger currentIndex = finalIndex + 1; - - BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':'); - if (hasSeconds) - second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex); - - currentIndex = finalIndex + 1; - BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' '); - if (hasTimeZone) - timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); - - return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset); -} - - -static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" - @"yyyy-MM-dd'T'HH:mm:sszzz" - @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" - etc.*/ - - NSUInteger finalIndex = 0; - NSInteger day = 1; - NSInteger month = RSJanuary; - NSInteger year = 1970; - NSInteger hour = 0; - NSInteger minute = 0; - NSInteger second = 0; - NSInteger milliseconds = 0; - NSInteger timeZoneOffset = 0; - - year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex); - month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - - NSUInteger currentIndex = finalIndex + 1; - BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.'); - if (hasMilliseconds) { - milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex); - currentIndex = finalIndex + 1; - } - - timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); - - return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset); -} - - -static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) { - - NSUInteger i = 0; - - for (i = 0; i < numberOfBytes; i++) { - if (bytes[i] == ' ' || bytes[i] == ',') - return YES; - } - - return NO; -} - - -static BOOL dateIsW3CDate(const char *bytes, NSUInteger numberOfBytes) { - - // Something like 2010-11-17T08:40:07-05:00 - // But might be missing T character in the middle. - // Looks for four digits in a row followed by a -. - - for (NSUInteger i = 0; i < numberOfBytes; i++) { - char ch = bytes[i]; - if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') { - continue; - } - if (numberOfBytes - i < 5) { - return NO; - } - return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == '-'; - } - - return NO; -} - -static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) { - return numberOfBytes < 6 || numberOfBytes > 150; -} - - -#pragma mark - API - -NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - if (numberOfBytesIsOutsideReasonableRange(numberOfBytes)) - return nil; - - if (dateIsW3CDate(bytes, numberOfBytes)) { - return RSParseW3CWithBytes(bytes, numberOfBytes); - } - if (dateIsPubDate(bytes, numberOfBytes)) - return RSParsePubDateWithBytes(bytes, numberOfBytes); - - // Fallback, in case our detection fails. - return RSParseW3CWithBytes(bytes, numberOfBytes); -} - - -NSDate *RSDateWithString(NSString *dateString) { - - const char *utf8String = [dateString UTF8String]; - return RSDateWithBytes(utf8String, strlen(utf8String)); -} - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h deleted file mode 100755 index 67c7f9f6c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h +++ /dev/null @@ -1,35 +0,0 @@ -// -// RSHTMLLinkParser.h -// RSParser -// -// Created by Brent Simmons on 8/7/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -/*Returns all some_text as RSHTMLLink object array.*/ - -@class ParserData; -@class RSHTMLLink; - -@interface RSHTMLLinkParser : NSObject - -+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData; - -@end - - -@interface RSHTMLLink : NSObject - -// Any of these, even urlString, may be nil, because HTML can be bad. - -@property (nonatomic, nullable, readonly) NSString *urlString; //absolute -@property (nonatomic, nullable, readonly) NSString *text; -@property (nonatomic, nullable, readonly) NSString *title; //title attribute inside anchor tag - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m deleted file mode 100755 index 624e33569..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m +++ /dev/null @@ -1,154 +0,0 @@ -// -// RSHTMLLinkParser.m -// RSParser -// -// Created by Brent Simmons on 8/7/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSHTMLLinkParser.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" -#import "ParserData.h" - -#import - - - -@interface RSHTMLLinkParser() - -@property (nonatomic, readonly) NSMutableArray *links; -@property (nonatomic, readonly) ParserData *parserData; -@property (nonatomic, readonly) NSMutableArray *dictionaries; -@property (nonatomic, readonly) NSURL *baseURL; - -@end - - -@interface RSHTMLLink() - -@property (nonatomic, readwrite) NSString *urlString; //absolute -@property (nonatomic, readwrite) NSString *text; -@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag - -@end - - -@implementation RSHTMLLinkParser - - -#pragma mark - Class Methods - -+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData { - - RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData]; - return parser.links; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - NSParameterAssert(parserData.data); - NSParameterAssert(parserData.url); - - self = [super init]; - if (!self) { - return nil; - } - - _links = [NSMutableArray new]; - _parserData = parserData; - _dictionaries = [NSMutableArray new]; - _baseURL = [NSURL URLWithString:parserData.url]; - - [self parse]; - - return self; -} - - -#pragma mark - Parse - -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.parserData.data]; - [parser finishParsing]; -} - - -- (RSHTMLLink *)currentLink { - - return self.links.lastObject; -} - - -static NSString *kHrefKey = @"href"; - -- (NSString *)urlStringFromDictionary:(NSDictionary *)d { - - NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (!href) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL]; - return absoluteURL.absoluteString; -} - - -static NSString *kTitleKey = @"title"; - -- (NSString *)titleFromDictionary:(NSDictionary *)d { - - return [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; -} - - -- (void)handleLinkAttributes:(NSDictionary *)d { - - RSHTMLLink *link = self.currentLink; - link.urlString = [self urlStringFromDictionary:d]; - link.title = [self titleFromDictionary:d]; -} - - -static const char *kAnchor = "a"; -static const NSInteger kAnchorLength = 2; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { - return; - } - - RSHTMLLink *link = [RSHTMLLink new]; - [self.links addObject:link]; - - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleLinkAttributes:d]; - } - - [SAXParser beginStoringCharacters]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName { - - if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { - return; - } - - self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace; -} - -@end - -@implementation RSHTMLLink - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h deleted file mode 100755 index 0010740a1..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h +++ /dev/null @@ -1,98 +0,0 @@ -// -// RSHTMLMetadata.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; -@import CoreGraphics; - -@class RSHTMLMetadataFeedLink; -@class RSHTMLMetadataAppleTouchIcon; -@class RSHTMLMetadataFavicon; -@class RSHTMLOpenGraphProperties; -@class RSHTMLOpenGraphImage; -@class RSHTMLTag; -@class RSHTMLTwitterProperties; - -NS_ASSUME_NONNULL_BEGIN - -__attribute__((swift_attr("@Sendable"))) -@interface RSHTMLMetadata : NSObject - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, readonly) NSString *baseURLString; -@property (nonatomic, readonly) NSArray *tags; - -@property (nonatomic, readonly) NSArray *faviconLinks DEPRECATED_MSG_ATTRIBUTE("Use the favicons property instead."); -@property (nonatomic, readonly) NSArray *favicons; -@property (nonatomic, readonly) NSArray *appleTouchIcons; -@property (nonatomic, readonly) NSArray *feedLinks; - -@property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties; -@property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties; - -@end - - -@interface RSHTMLMetadataAppleTouchIcon : NSObject - -@property (nonatomic, readonly) NSString *rel; -@property (nonatomic, nullable, readonly) NSString *sizes; -@property (nonatomic, readonly) CGSize size; -@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. - -@end - - -@interface RSHTMLMetadataFeedLink : NSObject - -@property (nonatomic, nullable, readonly) NSString *title; -@property (nonatomic, nullable, readonly) NSString *type; -@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. - -@end - -@interface RSHTMLMetadataFavicon : NSObject - -@property (nonatomic, nullable, readonly) NSString *type; -@property (nonatomic, nullable, readonly) NSString *urlString; - -@end - -@interface RSHTMLOpenGraphProperties : NSObject - -// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image. -// See http://ogp.me/ - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, readonly) NSArray *images; - -@end - -@interface RSHTMLOpenGraphImage : NSObject - -@property (nonatomic, nullable, readonly) NSString *url; -@property (nonatomic, nullable, readonly) NSString *secureURL; -@property (nonatomic, nullable, readonly) NSString *mimeType; -@property (nonatomic, readonly) CGFloat width; -@property (nonatomic, readonly) CGFloat height; -@property (nonatomic, nullable, readonly) NSString *altText; - -@end - -@interface RSHTMLTwitterProperties : NSObject - -// TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src. - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, nullable, readonly) NSString *imageURL; // twitter:image:src - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m deleted file mode 100755 index 2def0b078..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m +++ /dev/null @@ -1,483 +0,0 @@ -// -// RSHTMLMetadata.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLMetadata.h" -#import "RSParserInternal.h" -#import "RSHTMLTag.h" - - - -static NSString *urlStringFromDictionary(NSDictionary *d); -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString); -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString); -static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString); -static NSString *relValue(NSDictionary *d); -static BOOL typeIsFeedType(NSString *type); - -static NSString *kIconRelValue = @"icon"; -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kAppleTouchIconValue = @"apple-touch-icon"; -static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed"; -static NSString *kSizesKey = @"sizes"; -static NSString *kTitleKey = @"title"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateKey = @"alternate"; -static NSString *kRSSSuffix = @"/rss+xml"; -static NSString *kAtomSuffix = @"/atom+xml"; -static NSString *kJSONSuffix = @"/json"; -static NSString *kTypeKey = @"type"; - -@interface RSHTMLMetadataAppleTouchIcon () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - - -@interface RSHTMLMetadataFeedLink () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - -@interface RSHTMLMetadataFavicon () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - -@implementation RSHTMLMetadata - -#pragma mark - Init - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - _baseURLString = urlString; - _tags = tags; - - _favicons = [self resolvedFaviconLinks]; - - NSArray *appleTouchIconTags = [self appleTouchIconTags]; - _appleTouchIcons = objectsOfClassWithTags([RSHTMLMetadataAppleTouchIcon class], appleTouchIconTags, urlString); - - NSArray *feedLinkTags = [self feedLinkTags]; - _feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString); - - _openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags]; - _twitterProperties = [[RSHTMLTwitterProperties alloc] initWithURLString:urlString tags:tags]; - - return self; -} - -#pragma mark - Private - -- (NSArray *)linkTagsWithMatchingRel:(NSString *)valueToMatch { - - // Case-insensitive; matches a whitespace-delimited word - - NSMutableArray *tags = [NSMutableArray array]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink || RSParserStringIsEmpty(urlStringFromDictionary(tag.attributes))) { - continue; - } - NSString *oneRelValue = relValue(tag.attributes); - if (oneRelValue) { - NSArray *relValues = [oneRelValue componentsSeparatedByCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; - - for (NSString *relValue in relValues) { - if ([relValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) { - [tags addObject:tag]; - break; - } - } - } - } - - return tags; -} - - -- (NSArray *)appleTouchIconTags { - - NSMutableArray *tags = [NSMutableArray new]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink) { - continue; - } - NSString *oneRelValue = relValue(tag.attributes).lowercaseString; - if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) { - [tags addObject:tag]; - } - } - - return tags; -} - - -- (NSArray *)feedLinkTags { - - NSMutableArray *tags = [NSMutableArray new]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink) { - continue; - } - - NSDictionary *oneDictionary = tag.attributes; - NSString *oneRelValue = relValue(oneDictionary).lowercaseString; - if (![oneRelValue isEqualToString:kAlternateKey]) { - continue; - } - - NSString *oneType = [oneDictionary rsparser_objectForCaseInsensitiveKey:kTypeKey]; - if (!typeIsFeedType(oneType)) { - continue; - } - - if (RSParserStringIsEmpty(urlStringFromDictionary(oneDictionary))) { - continue; - } - - [tags addObject:tag]; - } - - return tags; -} - -- (NSArray *)faviconLinks { - NSMutableArray *urls = [NSMutableArray array]; - - for (RSHTMLMetadataFavicon *favicon in self.favicons) { - [urls addObject:favicon.urlString]; - } - - return urls; -} - -- (NSArray *)resolvedFaviconLinks { - NSArray *tags = [self linkTagsWithMatchingRel:kIconRelValue]; - NSMutableArray *links = [NSMutableArray array]; - NSMutableSet *seenHrefs = [NSMutableSet setWithCapacity:tags.count]; - - for (RSHTMLTag *tag in tags) { - RSHTMLMetadataFavicon *link = [[RSHTMLMetadataFavicon alloc] initWithTag:tag baseURLString:self.baseURLString]; - NSString *urlString = link.urlString; - if (urlString == nil) { - continue; - } - if (![seenHrefs containsObject:urlString]) { - [links addObject:link]; - [seenHrefs addObject:urlString]; - } - } - - return links; -} - -@end - - -static NSString *relValue(NSDictionary *d) { - - return [d rsparser_objectForCaseInsensitiveKey:kRelKey]; -} - - -static NSString *urlStringFromDictionary(NSDictionary *d) { - - NSString *urlString = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (urlString) { - return urlString; - } - - return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; -} - - -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) { - - NSURL *url = [NSURL URLWithString:baseURLString]; - if (!url) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url]; - return absoluteURL.absoluteURL.standardizedURL.absoluteString; -} - - -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) { - - NSString *urlString = urlStringFromDictionary(d); - if (RSParserStringIsEmpty(urlString)) { - return nil; - } - return absoluteURLStringWithRelativeURLString(urlString, baseURLString); -} - - -static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString) { - - NSMutableArray *objects = [NSMutableArray new]; - - for (RSHTMLTag *tag in tags) { - - id oneObject = [[class alloc] initWithTag:tag baseURLString:baseURLString]; - if (oneObject) { - [objects addObject:oneObject]; - } - } - - return objects; -} - - -static BOOL typeIsFeedType(NSString *type) { - - type = type.lowercaseString; - return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix] || [type hasSuffix:kJSONSuffix]; -} - - -@implementation RSHTMLMetadataAppleTouchIcon - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _sizes = [d rsparser_objectForCaseInsensitiveKey:kSizesKey]; - _rel = [d rsparser_objectForCaseInsensitiveKey:kRelKey]; - - _size = CGSizeZero; - if (_sizes) { - NSArray *components = [_sizes componentsSeparatedByString:@"x"]; - if (components.count == 2) { - CGFloat width = [components[0] floatValue]; - CGFloat height = [components[1] floatValue]; - _size = CGSizeMake(width, height); - } - } - - return self; -} - -@end - - -@implementation RSHTMLMetadataFeedLink - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _title = [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; - _type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey]; - - return self; -} - -@end - -@implementation RSHTMLMetadataFavicon - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey]; - - return self; -} - -@end - -@interface RSHTMLOpenGraphImage () - -@property (nonatomic, readwrite) NSString *url; -@property (nonatomic, readwrite) NSString *secureURL; -@property (nonatomic, readwrite) NSString *mimeType; -@property (nonatomic, readwrite) CGFloat width; -@property (nonatomic, readwrite) CGFloat height; -@property (nonatomic, readwrite) NSString *altText; - -@end - -@implementation RSHTMLOpenGraphImage - - -@end - -@interface RSHTMLOpenGraphProperties () - -@property (nonatomic) NSMutableArray *ogImages; -@end - -@implementation RSHTMLOpenGraphProperties - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - _ogImages = [NSMutableArray new]; - - [self parseTags:tags]; - return self; -} - - -- (RSHTMLOpenGraphImage *)currentImage { - - return self.ogImages.lastObject; -} - - -- (RSHTMLOpenGraphImage *)pushImage { - - RSHTMLOpenGraphImage *image = [RSHTMLOpenGraphImage new]; - [self.ogImages addObject:image]; - return image; -} - -- (RSHTMLOpenGraphImage *)ensureImage { - - RSHTMLOpenGraphImage *image = [self currentImage]; - if (image != nil) { - return image; - } - return [self pushImage]; -} - - -- (NSArray *)images { - - return self.ogImages; -} - -static NSString *ogPrefix = @"og:"; -static NSString *ogImage = @"og:image"; -static NSString *ogImageURL = @"og:image:url"; -static NSString *ogImageSecureURL = @"og:image:secure_url"; -static NSString *ogImageType = @"og:image:type"; -static NSString *ogImageWidth = @"og:image:width"; -static NSString *ogImageHeight = @"og:image:height"; -static NSString *ogImageAlt = @"og:image:alt"; -static NSString *ogPropertyKey = @"property"; -static NSString *ogContentKey = @"content"; - -- (void)parseTags:(NSArray *)tags { - - for (RSHTMLTag *tag in tags) { - - if (tag.type != RSHTMLTagTypeMeta) { - continue; - } - - NSString *propertyName = tag.attributes[ogPropertyKey]; - if (!propertyName || ![propertyName hasPrefix:ogPrefix]) { - continue; - } - NSString *content = tag.attributes[ogContentKey]; - if (!content) { - continue; - } - - if ([propertyName isEqualToString:ogImage]) { - RSHTMLOpenGraphImage *image = [self currentImage]; - if (!image || image.url) { // Most likely case, since og:image will probably appear before other image attributes. - image = [self pushImage]; - } - image.url = content; - } - - else if ([propertyName isEqualToString:ogImageURL]) { - [self ensureImage].url = content; - } - else if ([propertyName isEqualToString:ogImageSecureURL]) { - [self ensureImage].secureURL = content; - } - else if ([propertyName isEqualToString:ogImageType]) { - [self ensureImage].mimeType = content; - } - else if ([propertyName isEqualToString:ogImageAlt]) { - [self ensureImage].altText = content; - } - else if ([propertyName isEqualToString:ogImageWidth]) { - [self ensureImage].width = [content floatValue]; - } - else if ([propertyName isEqualToString:ogImageHeight]) { - [self ensureImage].height = [content floatValue]; - } - } -} - -@end - -@implementation RSHTMLTwitterProperties - -static NSString *twitterNameKey = @"name"; -static NSString *twitterContentKey = @"content"; -static NSString *twitterImageSrc = @"twitter:image:src"; - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - for (RSHTMLTag *tag in tags) { - - if (tag.type != RSHTMLTagTypeMeta) { - continue; - } - NSString *name = tag.attributes[twitterNameKey]; - if (!name || ![name isEqualToString:twitterImageSrc]) { - continue; - } - NSString *content = tag.attributes[twitterContentKey]; - if (!content || content.length < 1) { - continue; - } - _imageURL = content; - break; - } - - return self; -} - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h deleted file mode 100755 index f9361905c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSHTMLMetadataParser.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@class RSHTMLMetadata; -@class ParserData; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSHTMLMetadataParser : NSObject - -+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData; - - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m deleted file mode 100755 index 254fd109d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m +++ /dev/null @@ -1,151 +0,0 @@ -// -// RSHTMLMetadataParser.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLMetadataParser.h" -#import "RSHTMLMetadata.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" -#import "ParserData.h" -#import "RSHTMLTag.h" - -#import - - -@interface RSHTMLMetadataParser () - -@property (nonatomic, readonly) ParserData *parserData; -@property (nonatomic, readwrite) RSHTMLMetadata *metadata; -@property (nonatomic) NSMutableArray *tags; -@property (nonatomic) BOOL didFinishParsing; -@property (nonatomic) BOOL shouldScanPastHeadSection; - -@end - - -@implementation RSHTMLMetadataParser - - -#pragma mark - Class Methods - -+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData { - - RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData]; - return parser.metadata; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - NSParameterAssert(parserData.data); - NSParameterAssert(parserData.url); - - self = [super init]; - if (!self) { - return nil; - } - - _parserData = parserData; - _tags = [NSMutableArray new]; - - // YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section. - // This allows for a special case where we continue to scan after the head section. - // (Yes, this match could yield false positives, but it’s harmless.) - _shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound; - - [self parse]; - - return self; -} - - -#pragma mark - Parse - -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.parserData.data]; - [parser finishParsing]; - - self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags]; -} - - -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kRelKey = @"rel"; - -- (NSString *)linkForDictionary:(NSDictionary *)d { - - NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (link) { - return link; - } - - return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; -} - -- (void)handleLinkAttributes:(NSDictionary *)d { - - if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) { - return; - } - if (RSParserStringIsEmpty([self linkForDictionary:d])) { - return; - } - - RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d]; - [self.tags addObject:tag]; -} - -- (void)handleMetaAttributes:(NSDictionary *)d { - - RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d]; - [self.tags addObject:tag]; -} - -#pragma mark - RSSAXHTMLParserDelegate - -static const char *kBody = "body"; -static const NSInteger kBodyLength = 5; -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; -static const char *kMeta = "meta"; -static const NSInteger kMetaLength = 5; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (self.didFinishParsing) { - return; - } - - if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) { - self.didFinishParsing = YES; - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleLinkAttributes:d]; - } - return; - } - - if (RSSAXEqualTags(localName, kMeta, kMetaLength)) { - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleMetaAttributes:d]; - } - } -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h deleted file mode 100755 index a2bfb3175..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h +++ /dev/null @@ -1,37 +0,0 @@ -// -// RSParsedArticle.h -// RSParser -// -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedEnclosure; -@class RSParsedAuthor; - -@interface RSParsedArticle : NSObject - -- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL; - -@property (nonatomic, readonly, nonnull) NSString *feedURL; -@property (nonatomic, nonnull) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.) - -@property (nonatomic, nullable) NSString *guid; -@property (nonatomic, nullable) NSString *title; -@property (nonatomic, nullable) NSString *body; -@property (nonatomic, nullable) NSString *link; -@property (nonatomic, nullable) NSString *permalink; -@property (nonatomic, nullable) NSSet *authors; -@property (nonatomic, nullable) NSSet *enclosures; -@property (nonatomic, nullable) NSDate *datePublished; -@property (nonatomic, nullable) NSDate *dateModified; -@property (nonatomic, nonnull) NSDate *dateParsed; -@property (nonatomic, nullable) NSString *language; - -- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure; -- (void)addAuthor:(RSParsedAuthor *_Nonnull)author; - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m deleted file mode 100755 index b94930a79..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m +++ /dev/null @@ -1,134 +0,0 @@ -// -// RSParsedArticle.m -// RSParser -// -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. -// - - -#import "RSParsedArticle.h" -#import "RSParserInternal.h" -#import "NSString+RSParser.h" -#import "RSParsedAuthor.h" -#import "RSParsedEnclosure.h" - - - -@implementation RSParsedArticle - - -#pragma mark - Init - -- (instancetype)initWithFeedURL:(NSString *)feedURL { - - NSParameterAssert(feedURL != nil); - - self = [super init]; - if (!self) { - return nil; - } - - _feedURL = feedURL; - _dateParsed = [NSDate date]; - - return self; -} - - -#pragma mark - Enclosures - -- (void)addEnclosure:(RSParsedEnclosure *)enclosure { - - if (self.enclosures) { - self.enclosures = [self.enclosures setByAddingObject:enclosure]; - } - else { - self.enclosures = [NSSet setWithObject:enclosure]; - } -} - -#pragma mark - Authors - -- (void)addAuthor:(RSParsedAuthor *)author { - - if (self.authors) { - self.authors = [self.authors setByAddingObject:author]; - } - else { - self.authors = [NSSet setWithObject:author]; - } -} - -#pragma mark - articleID - -- (NSString *)articleID { - - if (self.guid) { - return self.guid; - } - - if (!_articleID) { - _articleID = [self calculatedArticleID]; - } - - return _articleID; -} - - -- (NSString *)calculatedArticleID { - - /*Concatenate a combination of properties when no guid. Then hash the result. - In general, feeds should have guids. When they don't, re-runs are very likely, - because there's no other 100% reliable way to determine identity. - This is intended to create an ID unique inside a feed, but not globally unique. - Not suitable for a database ID, in other words.*/ - - NSMutableString *s = [NSMutableString stringWithString:@""]; - - NSString *datePublishedTimeStampString = nil; - if (self.datePublished) { - datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]; - } - - // Ideally we have a permalink and a pubDate. Either one would probably be a good guid, but together they should be rock-solid. (In theory. Feeds are buggy, though.) - if (!RSParserStringIsEmpty(self.permalink) && datePublishedTimeStampString) { - [s appendString:self.permalink]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.link) && datePublishedTimeStampString) { - [s appendString:self.link]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.title) && datePublishedTimeStampString) { - [s appendString:self.title]; - [s appendString:datePublishedTimeStampString]; - } - - else if (datePublishedTimeStampString) { - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.permalink)) { - [s appendString:self.permalink]; - } - - else if (!RSParserStringIsEmpty(self.link)) { - [s appendString:self.link]; - } - - else if (!RSParserStringIsEmpty(self.title)) { - [s appendString:self.title]; - } - - else if (!RSParserStringIsEmpty(self.body)) { - [s appendString:self.body]; - } - - return [s rsparser_md5Hash]; -} - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h deleted file mode 100755 index 80be90fed..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h +++ /dev/null @@ -1,23 +0,0 @@ -// -// RSParsedFeed.h -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedArticle; - -@interface RSParsedFeed : NSObject - -- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link language:(NSString * _Nullable)language articles:(NSArray * _Nonnull)articles; - -@property (nonatomic, readonly, nonnull) NSString *urlString; -@property (nonatomic, readonly, nullable) NSString *title; -@property (nonatomic, readonly, nullable) NSString *link; -@property (nonatomic, readonly, nullable) NSString *language; -@property (nonatomic, readonly, nonnull) NSSet *articles; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m deleted file mode 100755 index ef0c42e76..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m +++ /dev/null @@ -1,32 +0,0 @@ -// -// RSParsedFeed.m -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSParsedFeed.h" - - - -@implementation RSParsedFeed - -- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link language:(NSString *)language articles:(NSSet *)articles { - - self = [super init]; - if (!self) { - return nil; - } - - _urlString = urlString; - _title = title; - _link = link; - _language = language; - _articles = articles; - - return self; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h b/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h deleted file mode 100755 index 76209e076..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSParserInternal.h -// RSParser -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -BOOL RSParserObjectIsEmpty(id _Nullable obj); -BOOL RSParserStringIsEmpty(NSString * _Nullable s); - - -@interface NSDictionary (RSParserInternal) - -- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m b/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m deleted file mode 100755 index 4ba6f8a97..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m +++ /dev/null @@ -1,61 +0,0 @@ -// -// RSParserInternal.m -// RSParser -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSParserInternal.h" -#import - - -static BOOL RSParserIsNil(id obj) { - - return obj == nil || obj == [NSNull null]; -} - -BOOL RSParserObjectIsEmpty(id obj) { - - if (RSParserIsNil(obj)) { - return YES; - } - - if ([obj respondsToSelector:@selector(count)]) { - return [obj count] < 1; - } - - if ([obj respondsToSelector:@selector(length)]) { - return [obj length] < 1; - } - - return NO; /*Shouldn't get here very often.*/ -} - -BOOL RSParserStringIsEmpty(NSString *s) { - - return RSParserIsNil(s) || s.length < 1; -} - - -@implementation NSDictionary (RSParserInternal) - -- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key { - - id obj = self[key]; - if (obj) { - return obj; - } - - for (NSString *oneKey in self.allKeys) { - - if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { - return self[oneKey]; - } - } - - return nil; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h deleted file mode 100755 index 26e97d0a2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSRSSParser.h -// RSParser -// -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class ParserData; -@class RSParsedFeed; - -@interface RSRSSParser : NSObject - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m deleted file mode 100755 index 455320ab2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m +++ /dev/null @@ -1,523 +0,0 @@ -// -// RSRSSParser.m -// RSParser -// -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -#import "RSRSSParser.h" -#import "RSSAXParser.h" -#import "RSParsedFeed.h" -#import "RSParsedArticle.h" -#import "RSParserInternal.h" -#import "NSString+RSParser.h" -#import "RSDateParser.h" -#import "ParserData.h" -#import "RSParsedEnclosure.h" -#import "RSParsedAuthor.h" - - - -#import - - -@interface RSRSSParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) NSDictionary *currentAttributes; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) BOOL parsingChannelImage; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) BOOL endRSSFound; -@property (nonatomic) NSString *link; -@property (nonatomic) NSString *title; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) BOOL isRDF; -@property (nonatomic) NSString *language; - -@end - - -@implementation RSRSSParser - -#pragma mark - Class Methods - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData { - - RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData]; - return [parser parseFeed]; -} - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = parserData.data; - _urlString = parserData.url; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _articles = [NSMutableArray new]; - - return self; -} - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles]; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kIsPermaLinkKey = @"isPermaLink"; -static NSString *kURLKey = @"url"; -static NSString *kLengthKey = @"length"; -static NSString *kTypeKey = @"type"; -static NSString *kFalseValue = @"false"; -static NSString *kTrueValue = @"true"; -static NSString *kContentEncodedKey = @"content:encoded"; -static NSString *kDCDateKey = @"dc:date"; -static NSString *kDCCreatorKey = @"dc:creator"; -static NSString *kRDFAboutKey = @"rdf:about"; - -static const char *kItem = "item"; -static const NSInteger kItemLength = 5; - -static const char *kImage = "image"; -static const NSInteger kImageLength = 6; - -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kDC = "dc"; -static const NSInteger kDCLength = 3; - -static const char *kCreator = "creator"; -static const NSInteger kCreatorLength = 8; - -static const char *kDate = "date"; -static const NSInteger kDateLength = 5; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kEncoded = "encoded"; -static const NSInteger kEncodedLength = 8; - -static const char *kGuid = "guid"; -static const NSInteger kGuidLength = 5; - -static const char *kPubDate = "pubDate"; -static const NSInteger kPubDateLength = 8; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kDescription = "description"; -static const NSInteger kDescriptionLength = 12; - -static const char *kRSS = "rss"; -static const NSInteger kRSSLength = 4; - -static const char *kURL = "url"; -static const NSInteger kURLLength = 4; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kIsPermaLink = "isPermaLink"; -static const NSInteger kIsPermaLinkLength = 12; - -static const char *kRDF = "rdf"; -static const NSInteger kRDFlength = 4; - -static const char *kAbout = "about"; -static const NSInteger kAboutLength = 6; - -static const char *kFalse = "false"; -static const NSInteger kFalseLength = 6; - -static const char *kTrue = "true"; -static const NSInteger kTrueLength = 5; - -static const char *kUppercaseRDF = "RDF"; -static const NSInteger kUppercaseRDFLength = 4; - -static const char *kEnclosure = "enclosure"; -static const NSInteger kEnclosureLength = 10; - -static const char *kLanguage = "language"; -static const NSInteger kLanguageLength = 9; - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } -} - - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - if (!self.link) { - self.link = [self currentString]; - } - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.title = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kLanguage, kLanguageLength)) { - self.language = [self currentString]; - } -} - -- (void)addAuthorWithString:(NSString *)authorString { - - if (RSParserStringIsEmpty(authorString)) { - return; - } - - RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:[self currentString]]; - [self.currentArticle addAuthor:author]; -} - -- (void)addDCElement:(const xmlChar *)localName { - - if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) { - [self addAuthorWithString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kDate, kDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } -} - - -- (void)addGuid { - - NSString *guid = [self currentString]; - self.currentArticle.guid = guid; - - NSString *isPermaLinkValue = [self.currentAttributes rsparser_objectForCaseInsensitiveKey:@"ispermalink"]; - if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) { - if ([self stringIsProbablyAURLOrRelativePath:guid]) { - self.currentArticle.permalink = [self urlString:guid]; - } - } -} - -- (void)addEnclosure { - - NSDictionary *attributes = self.currentAttributes; - NSString *url = attributes[kURLKey]; - if (!url || url.length < 1) { - return; - } - - RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init]; - enclosure.url = url; - enclosure.length = [attributes[kLengthKey] integerValue]; - enclosure.mimeType = attributes[kTypeKey]; - - [self.currentArticle addEnclosure:enclosure]; -} - -- (BOOL)stringIsProbablyAURLOrRelativePath:(NSString *)s { - - /*The RSS guid is defined as a permalink, except when it appears like this: - some—identifier - However, people often seem to think it’s *not* a permalink by default, even - though it is. So we try to detect the situation where the value is not a URL string, - and not even a relative path. This may need to evolve over time as we find - feeds broken in different ways.*/ - - if (![s rsparser_contains:@"/"]) { - // This seems to be just about the best possible check. - // Bad guids are often just integers, for instance. - return NO; - } - - if ([s.lowercaseString hasPrefix:@"tag:"]) { // A common non-URL guid form - return NO; - } - return YES; -} - -- (NSString *)urlString:(NSString *)s { - - /*Resolve against home page URL (if available) or feed URL.*/ - - if ([[s lowercaseString] hasPrefix:@"http"]) { - return s; - } - - if (!self.link) { - //TODO: get feed URL and use that to resolve URL.*/ - return s; - } - - NSURL *baseURL = [NSURL URLWithString:self.link]; - if (!baseURL) { - return s; - } - - NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL]; - if (resolvedURL.absoluteString) { - return resolvedURL.absoluteString; - } - - return s; -} - - -- (NSString *)currentString { - - return self.parser.currentStringWithTrimmedWhitespace; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kDC, kDCLength)) { - - [self addDCElement:localName]; - return; - } - - if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) { - NSString *s = [self currentString]; - if (!RSParserStringIsEmpty(s)) { - self.currentArticle.body = s; - } - return; - } - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kGuid, kGuidLength)) { - [self addGuid]; - } - else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } - else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - [self addAuthorWithString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - self.currentArticle.link = [self urlString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { - - if (!self.currentArticle.body) { - self.currentArticle.body = [self currentString]; - } - } - else if (!self.parsingAuthor && RSSAXEqualTags(localName, kTitle, kTitleLength)) { - NSString *articleTitle = [self currentString]; - if (articleTitle != nil) { - self.currentArticle.title = articleTitle; - } - } - else if (RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) { - [self addEnclosure]; - } -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - -#pragma mark - RSSAXParserDelegate - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (self.endRSSFound) { - return; - } - - if (RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) { - self.isRDF = YES; - return; - } - - NSDictionary *xmlAttributes = nil; - if ((self.isRDF && RSSAXEqualTags(localName, kItem, kItemLength)) || RSSAXEqualTags(localName, kGuid, kGuidLength) || RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) { - xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - } - if (self.currentAttributes != xmlAttributes) { - self.currentAttributes = xmlAttributes; - } - - if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) { - - [self addArticle]; - self.parsingArticle = YES; - - if (self.isRDF && xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/ - self.currentArticle.guid = xmlAttributes[kRDFAboutKey]; - self.currentArticle.permalink = self.currentArticle.guid; - } - } - - else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = YES; - } - else if (!prefix && RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - if (self.parsingArticle) { - self.parsingAuthor = true; - } - } - - if (!self.parsingChannelImage) { - [self.parser beginStoringCharacters]; - } -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (self.endRSSFound) { - return; - } - - if (self.isRDF && RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) { - self.endRSSFound = YES; - } - - else if (RSSAXEqualTags(localName, kRSS, kRSSLength)) { - self.endRSSFound = YES; - } - - else if (RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = NO; - } - - else if (RSSAXEqualTags(localName, kItem, kItemLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle) { - [self addArticleElement:localName prefix:prefix]; - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - } - } - - else if (!self.parsingChannelImage) { - [self addFeedElement:localName prefix:prefix]; - } -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) { - - if (RSSAXEqualTags(name, kAbout, kAboutLength)) { - return kRDFAboutKey; - } - - return nil; - } - - if (prefix) { - return nil; - } - - if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) { - return kIsPermaLinkKey; - } - - if (RSSAXEqualTags(name, kURL, kURLLength)) { - return kURLKey; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - return nil; -} - - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger falseLength = kFalseLength - 1; - static const NSUInteger trueLength = kTrueLength - 1; - - if (length == falseLength && equalBytes(bytes, kFalse, falseLength)) { - return kFalseValue; - } - - if (length == trueLength && equalBytes(bytes, kTrue, trueLength)) { - return kTrueValue; - } - - return nil; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h deleted file mode 100755 index f67d60cf6..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// RSSAXHTMLParser.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@class RSSAXHTMLParser; - -@protocol RSSAXHTMLParserDelegate - -@optional - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char *_Nullable*_Nullable)attributes; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(nullable const unsigned char *)localName; - -// Length is guaranteed to be greater than 0. -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(nullable const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might). - -@end - - -@interface RSSAXHTMLParser : NSObject - - -- (instancetype)initWithDelegate:(id)delegate; - -- (void)parseData:(NSData *)data; -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; -- (void)cancel; - -@property (nullable, nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded. -@property (nullable, nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters. -@property (nullable, nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement. - -// Delegate can call from within XMLStartElement. - -- (nullable NSDictionary *)attributesDictionary:(const unsigned char *_Nullable*_Nullable)attributes; - - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m deleted file mode 100755 index 5df2d84fd..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m +++ /dev/null @@ -1,321 +0,0 @@ -// -// RSSAXHTMLParser.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" - -#import -#import -#import - - - -@interface RSSAXHTMLParser () - -@property (nonatomic) id delegate; -@property (nonatomic, assign) htmlParserCtxtPtr context; -@property (nonatomic, assign) BOOL storingCharacters; -@property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - -@end - - -@implementation RSSAXHTMLParser - - -+ (void)initialize { - - RSSAXInitLibXMLParser(); -} - - -#pragma mark - Init - -- (instancetype)initWithDelegate:(id)delegate { - - self = [super init]; - if (self == nil) - return nil; - - _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; - } - - return self; -} - - -#pragma mark - Dealloc - -- (void)dealloc { - - if (_context != nil) { - htmlFreeParserCtxt(_context); - _context = nil; - } - _delegate = nil; -} - - -#pragma mark - API - -static xmlSAXHandler saxHandlerStruct; - -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { - - if (self.context == nil) { - - xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); - self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); - htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); - } - - @autoreleasepool { - htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); - } -} - - -- (void)finishParsing { - - NSAssert(self.context != nil, nil); - if (self.context == nil) - return; - - @autoreleasepool { - htmlParseChunk(self.context, nil, 0, 1); - htmlFreeParserCtxt(self.context); - self.context = nil; - self.characters = nil; - } -} - - -- (void)cancel { - - @autoreleasepool { - xmlStopParser(self.context); - } -} - - - -- (void)beginStoringCharacters { - self.storingCharacters = YES; - self.characters = [NSMutableData new]; -} - - -- (void)endStoringCharacters { - self.storingCharacters = NO; - self.characters = nil; -} - - -- (NSData *)currentCharacters { - - if (!self.storingCharacters) { - return nil; - } - - return self.characters; -} - - -- (NSString *)currentString { - - NSData *d = self.currentCharacters; - if (RSParserObjectIsEmpty(d)) { - return nil; - } - - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; -} - - -- (NSString *)currentStringWithTrimmedWhitespace { - - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; -} - - -#pragma mark - Attributes Dictionary - -- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes { - - if (!attributes) { - return nil; - } - - NSMutableDictionary *d = [NSMutableDictionary new]; - - NSInteger ix = 0; - NSString *currentKey = nil; - while (true) { - - const xmlChar *oneAttribute = attributes[ix]; - ix++; - - if (!currentKey && !oneAttribute) { - break; - } - - if (!currentKey) { - currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - else { - NSString *value = nil; - if (oneAttribute) { - value = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - - d[currentKey] = value ? value : @""; - currentKey = nil; - } - } - - return [d copy]; -} - - -#pragma mark - Callbacks - -- (void)xmlEndDocument { - - @autoreleasepool { - if (self.delegateRespondsToEndOfDocumentMethod) { - [self.delegate saxParserDidReachEndOfDocument:self]; - } - - [self endStoringCharacters]; - } -} - - -- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { - - if (length < 1) { - return; - } - - @autoreleasepool { - if (self.storingCharacters) { - [self.characters appendBytes:(const void *)ch length:length]; - } - - if (self.delegateRespondsToCharactersFoundMethod) { - [self.delegate saxParser:self XMLCharactersFound:ch length:length]; - } - } -} - - -- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - - [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; - } - } -} - - -- (void)xmlEndElement:(const xmlChar *)localName { - - @autoreleasepool { - if (self.delegateRespondsToEndElementMethod) { - [self.delegate saxParser:self XMLEndElement:localName]; - } - - [self endStoringCharacters]; - } -} - - -@end - - -static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) { - - [(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes]; -} - - -static void endElementSAX(void *context, const xmlChar *localname) { - [(__bridge RSSAXHTMLParser *)context xmlEndElement:localname]; -} - - -static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { - [(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; -} - - -static void endDocumentSAX(void *context) { - [(__bridge RSSAXHTMLParser *)context xmlEndDocument]; -} - - -static htmlSAXHandler saxHandlerStruct = { - nil, /* internalSubset */ - nil, /* isStandalone */ - nil, /* hasInternalSubset */ - nil, /* hasExternalSubset */ - nil, /* resolveEntity */ - nil, /* getEntity */ - nil, /* entityDecl */ - nil, /* notationDecl */ - nil, /* attributeDecl */ - nil, /* elementDecl */ - nil, /* unparsedEntityDecl */ - nil, /* setDocumentLocator */ - nil, /* startDocument */ - endDocumentSAX, /* endDocument */ - startElementSAX, /* startElement*/ - endElementSAX, /* endElement */ - nil, /* reference */ - charactersFoundSAX, /* characters */ - nil, /* ignorableWhitespace */ - nil, /* processingInstruction */ - nil, /* comment */ - nil, /* warning */ - nil, /* error */ - nil, /* fatalError //: unused error() get all the errors */ - nil, /* getParameterEntity */ - nil, /* cdataBlock */ - nil, /* externalSubset */ - XML_SAX2_MAGIC, - nil, - nil, /* startElementNs */ - nil, /* endElementNs */ - nil /* serror */ -}; - diff --git a/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h b/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h deleted file mode 100644 index c9bd0008f..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h +++ /dev/null @@ -1,56 +0,0 @@ -// -// RSParser.h -// RSParser -// -// Created by Brent Simmons on 6/20/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -#import "../ParserData.h" -#import "../RSDateParser.h" - -// OPML - -#import "../RSOPMLParser.h" -#import "../RSOPMLDocument.h" -#import "../RSOPMLItem.h" -#import "../RSOPMLAttributes.h" -#import "../RSOPMLFeedSpecifier.h" -#import "../RSOPMLError.h" - -// For writing your own XML parser. - -#import "../RSSAXParser.h" - -// You should use FeedParser (Swift) instead of these two specific parsers -// and the objects they create. -// But they’re available if you want them. - -#import "../RSRSSParser.h" -#import "../RSAtomParser.h" -#import "../RSParsedFeed.h" -#import "../RSParsedArticle.h" -#import "../RSParsedEnclosure.h" -#import "../RSParsedAuthor.h" - -// HTML - -#import "../RSHTMLMetadataParser.h" -#import "../RSHTMLMetadata.h" -#import "../RSHTMLLinkParser.h" -#import "../RSSAXHTMLParser.h" // For writing your own HTML parser. -#import "../RSHTMLTag.h" - -// Utilities - -#import "../NSData+RSParser.h" -#import "../NSString+RSParser.h" - - - - - - diff --git a/NetNewsWire.xcodeproj/project.pbxproj b/NetNewsWire.xcodeproj/project.pbxproj index 18d63ad1d..6b0474782 100644 --- a/NetNewsWire.xcodeproj/project.pbxproj +++ b/NetNewsWire.xcodeproj/project.pbxproj @@ -385,12 +385,8 @@ 8454C3F8263F3AD400E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; }; 8456116B2BBD145200507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; }; 8456116C2BBD145200507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - 8456116E2BBD145200507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; }; - 8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 845611712BBD145D00507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; }; 845611722BBD145D00507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - 845611742BBD145D00507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; }; - 845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 845A29221FC9251E007B49E3 /* SidebarCellLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29211FC9251E007B49E3 /* SidebarCellLayout.swift */; }; 845A29241FC9255E007B49E3 /* SidebarCellAppearance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29231FC9255E007B49E3 /* SidebarCellAppearance.swift */; }; 845EE7B11FC2366500854A1F /* StarredFeedDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845EE7B01FC2366500854A1F /* StarredFeedDelegate.swift */; }; @@ -494,7 +490,6 @@ 84DC5FFE2BCE37A300F04682 /* AppDelegate+Shared.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84DC5FFD2BCE37A300F04682 /* AppDelegate+Shared.swift */; }; 84DC60002BCE37A300F04682 /* AppDelegate+Shared.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84DC5FFD2BCE37A300F04682 /* AppDelegate+Shared.swift */; }; 84DC60022BCE40B200F04682 /* Images in Frameworks */ = {isa = PBXBuildFile; productRef = 84DC60012BCE40B200F04682 /* Images */; }; - 84DC60042BCE40D000F04682 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 84DC60032BCE40D000F04682 /* ParserObjC */; }; 84DCA5122BABB75600792720 /* FoundationExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA5112BABB75600792720 /* FoundationExtras */; }; 84DCA5142BABB76100792720 /* AppKitExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA5132BABB76100792720 /* AppKitExtras */; }; 84DCA51E2BABB79900792720 /* FoundationExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA51D2BABB79900792720 /* FoundationExtras */; }; @@ -686,7 +681,6 @@ 8426DBC02BFDAEF200E98109 /* Web in Embed Frameworks */, 513F32782593EE6F0003048F /* Secrets in Embed Frameworks */, 513F327B2593EE6F0003048F /* SyncDatabase in Embed Frameworks */, - 845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */, 513F32722593EE6F0003048F /* Articles in Embed Frameworks */, 513F32812593EF180003048F /* Account in Embed Frameworks */, 8426DBB92BFDAD9200E98109 /* Core in Embed Frameworks */, @@ -741,7 +735,6 @@ 8426DBC32BFDAEFC00E98109 /* Web in Embed Frameworks */, 513277442590FBB60064F1E7 /* Account in Embed Frameworks */, 5132775F2590FC640064F1E7 /* Articles in Embed Frameworks */, - 8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */, 513277662590FC780064F1E7 /* Secrets in Embed Frameworks */, 513277652590FC640064F1E7 /* SyncDatabase in Embed Frameworks */, 8426DBB82BFDAD8500E98109 /* Core in Embed Frameworks */, @@ -1097,7 +1090,6 @@ 84A059EE2C3A4A570041209B /* FMDB */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = FMDB; sourceTree = ""; }; 84A059EF2C3A4A5B0041209B /* Web */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Web; sourceTree = ""; }; 84A059F02C3A4A5F0041209B /* Parser */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Parser; sourceTree = ""; }; - 84A059F12C3A4A620041209B /* ParserObjC */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = ParserObjC; sourceTree = ""; }; 84A059F22C3A4A670041209B /* Core */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Core; sourceTree = ""; }; 84A059F32C3A4A6C0041209B /* Tree */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Tree; sourceTree = ""; }; 84A059F42C3A4AA30041209B /* UIKitExtras */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = UIKitExtras; sourceTree = ""; }; @@ -1244,7 +1236,6 @@ 841CECDE2BAD06D10001EE72 /* Tree in Frameworks */, 51BC2F3824D3439A00E90810 /* Account in Frameworks */, 8426DBC82BFDAF4300E98109 /* Web in Frameworks */, - 84DC60042BCE40D000F04682 /* ParserObjC in Frameworks */, 84D9582C2BABE53B0053E7B2 /* FoundationExtras in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1291,7 +1282,6 @@ 84C1A8582BBBA5BD006E3E96 /* Web in Frameworks */, 516B695F24D2F33B00B5702F /* Account in Frameworks */, 84A699152BC34F3D00605AB8 /* ArticleExtractor in Frameworks */, - 845611742BBD145D00507B73 /* ParserObjC in Frameworks */, 845611712BBD145D00507B73 /* Parser in Frameworks */, 513F32712593EE6F0003048F /* Articles in Frameworks */, 513F32772593EE6F0003048F /* Secrets in Frameworks */, @@ -1329,7 +1319,6 @@ 841CECD82BAD04B20001EE72 /* Tree in Frameworks */, 8426DBC22BFDAEFC00E98109 /* Web in Frameworks */, 8456116B2BBD145200507B73 /* Parser in Frameworks */, - 8456116E2BBD145200507B73 /* ParserObjC in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2052,7 +2041,6 @@ 84A059EE2C3A4A570041209B /* FMDB */, 84A059EF2C3A4A5B0041209B /* Web */, 84A059F02C3A4A5F0041209B /* Parser */, - 84A059F12C3A4A620041209B /* ParserObjC */, 84A059F22C3A4A670041209B /* Core */, 84A059F32C3A4A6C0041209B /* Tree */, 84A059F42C3A4AA30041209B /* UIKitExtras */, @@ -2493,7 +2481,6 @@ 84D9582B2BABE53B0053E7B2 /* FoundationExtras */, 841CECDD2BAD06D10001EE72 /* Tree */, 84DC60012BCE40B200F04682 /* Images */, - 84DC60032BCE40D000F04682 /* ParserObjC */, 8426DBC72BFDAF4300E98109 /* Web */, ); productName = "NetNewsWire iOS Share Extension"; @@ -2608,7 +2595,6 @@ 841CECDB2BAD04BF0001EE72 /* Tree */, 84C1A8572BBBA5BD006E3E96 /* Web */, 845611702BBD145D00507B73 /* Parser */, - 845611732BBD145D00507B73 /* ParserObjC */, 8410C4A42BC1E28200D4F799 /* ReaderAPI */, 84A699142BC34F3D00605AB8 /* ArticleExtractor */, 84DC5FFB2BCE31DB00F04682 /* Images */, @@ -2657,7 +2643,6 @@ 8438C2DA2BABE0B00040C9EE /* CoreResources */, 841CECD72BAD04B20001EE72 /* Tree */, 8456116A2BBD145200507B73 /* Parser */, - 8456116D2BBD145200507B73 /* ParserObjC */, 84A699162BC34F4400605AB8 /* ArticleExtractor */, 84DC5FF92BCE31D200F04682 /* Images */, 8426DBC12BFDAEFC00E98109 /* Web */, @@ -4200,18 +4185,10 @@ isa = XCSwiftPackageProductDependency; productName = Parser; }; - 8456116D2BBD145200507B73 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 845611702BBD145D00507B73 /* Parser */ = { isa = XCSwiftPackageProductDependency; productName = Parser; }; - 845611732BBD145D00507B73 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 8479ABE22B9E906E00F84C4D /* Database */ = { isa = XCSwiftPackageProductDependency; productName = Database; @@ -4260,10 +4237,6 @@ isa = XCSwiftPackageProductDependency; productName = Images; }; - 84DC60032BCE40D000F04682 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 84DCA5112BABB75600792720 /* FoundationExtras */ = { isa = XCSwiftPackageProductDependency; productName = FoundationExtras; diff --git a/Shared/AppDelegate+Shared.swift b/Shared/AppDelegate+Shared.swift index 626ff7f2e..ebfe1a8d4 100644 --- a/Shared/AppDelegate+Shared.swift +++ b/Shared/AppDelegate+Shared.swift @@ -8,7 +8,6 @@ import Foundation import Images -import ParserObjC import Account extension AppDelegate: FaviconDownloaderDelegate, FeedIconDownloaderDelegate { diff --git a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift index da78fee44..87268b3cd 100644 --- a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift +++ b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift @@ -9,7 +9,6 @@ import Foundation import Web import Parser -import ParserObjC extension RSHTMLMetadata: @unchecked Sendable {} From cdd95843e8a2ccce6ff531420329298f328f846c Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 22:13:26 -0700 Subject: [PATCH 83/88] =?UTF-8?q?Remove=20ParserData=20from=20FeedParser?= =?UTF-8?q?=20API,=20so=20clients=20won=E2=80=99t=20need=20to=20import=20S?= =?UTF-8?q?AX.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Modules/Parser/Package.swift | 1 - .../Sources/FeedParser/Feeds/FeedParser.swift | 17 +++++++-------- .../Feeds/JSON/JSONFeedParser.swift | 8 +++---- .../Feeds/JSON/RSSInJSONParser.swift | 8 +++---- .../FeedParser/Feeds/XML/AtomParser.swift | 21 +++++++------------ .../FeedParser/Feeds/XML/RSSParser.swift | 21 +++++++------------ .../ParserTests/RSSInJSONParserTests.swift | 9 ++++++++ 7 files changed, 41 insertions(+), 44 deletions(-) diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index d475aca85..9e749f804 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -88,4 +88,3 @@ let package = Package( resources: [.copy("Resources")]), ] ) - diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift index 6e467f7b4..cc124c43b 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift @@ -7,16 +7,15 @@ // import Foundation -import SAX // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. // You don’t need to know the type of feed. public struct FeedParser { - public static func canParse(_ parserData: ParserData) -> Bool { + public static func canParse(_ data: Data) -> Bool { - let type = FeedType.feedType(parserData.data) + let type = FeedType.feedType(data) switch type { case .jsonFeed, .rssInJSON, .rss, .atom: @@ -26,24 +25,24 @@ public struct FeedParser { } } - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + public static func parse(urlString: String, data: Data) throws -> ParsedFeed? { - let type = FeedType.feedType(parserData.data) + let type = FeedType.feedType(data) switch type { case .jsonFeed: - return try JSONFeedParser.parse(parserData) + return try JSONFeedParser.parse(urlString: urlString, data: data) case .rssInJSON: - return try RSSInJSONParser.parse(parserData) + return try RSSInJSONParser.parse(urlString: urlString, data: data) case .rss: - let feed = RSSParser.parsedFeed(with: parserData) + let feed = RSSParser.parsedFeed(urlString: urlString, data: data) return RSSFeedTransformer.parsedFeed(with: feed, feedType: .rss) case .atom: - let feed = AtomParser.parsedFeed(with: parserData) + let feed = AtomParser.parsedFeed(urlString: urlString, data: data) return RSSFeedTransformer.parsedFeed(with: feed, feedType: .atom) case .unknown, .notAFeed: diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift index 36cc1f23d..488a1db4c 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -51,9 +51,9 @@ public struct JSONFeedParser { static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + public static func parse(urlString: String, data: Data) throws -> ParsedFeed? { - guard let d = JSONUtilities.dictionary(with: parserData.data) else { + guard let d = JSONUtilities.dictionary(with: data) else { throw FeedParserError(.invalidJSON) } @@ -69,7 +69,7 @@ public struct JSONFeedParser { let authors = parseAuthors(d) let homePageURL = d[Key.homePageURL] as? String - let feedURL = d[Key.feedURL] as? String ?? parserData.url + let feedURL = d[Key.feedURL] as? String ?? urlString let feedDescription = d[Key.feedDescription] as? String let nextURL = d[Key.nextURL] as? String let iconURL = d[Key.icon] as? String @@ -78,7 +78,7 @@ public struct JSONFeedParser { let hubs = parseHubs(d) let language = d[Key.language] as? String - let items = parseItems(itemsArray, parserData.url) + let items = parseItems(itemsArray, urlString) return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items) } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift index 2dcd92313..6e7feca1f 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift @@ -15,10 +15,10 @@ import DateParser public struct RSSInJSONParser { - public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + public static func parse(urlString: String, data: Data) throws -> ParsedFeed? { do { - guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { + guard let parsedObject = try JSONSerialization.jsonObject(with: data) as? JSONDictionary else { throw FeedParserError(.invalidJSON) } guard let rssObject = parsedObject["rss"] as? JSONDictionary else { @@ -46,11 +46,11 @@ public struct RSSInJSONParser { let title = channelObject["title"] as? String let homePageURL = channelObject["link"] as? String - let feedURL = parserData.url + let feedURL = urlString let feedDescription = channelObject["description"] as? String let feedLanguage = channelObject["language"] as? String - let items = parseItems(itemsObject!, parserData.url) + let items = parseItems(itemsObject!, urlString) return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 4aaa77925..a77015c3f 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -13,14 +13,8 @@ import DateParser final class AtomParser { - private var parserData: ParserData - private var feedURL: String { - parserData.url - } - private var data: Data { - parserData.data - } - + private var feedURL: String + private let data: Data private let feed: RSSFeed private var articles = [RSSArticle]() @@ -43,16 +37,17 @@ final class AtomParser { private var parsingSource = false private var endFeedFound = false - static func parsedFeed(with parserData: ParserData) -> RSSFeed { + static func parsedFeed(urlString: String, data: Data) -> RSSFeed { - let parser = AtomParser(parserData) + let parser = AtomParser(urlString: urlString, data: data) parser.parse() return parser.feed } - init(_ parserData: ParserData) { - self.parserData = parserData - self.feed = RSSFeed(urlString: parserData.url) + init(urlString: String, data: Data) { + self.feedURL = urlString + self.data = data + self.feed = RSSFeed(urlString: urlString) } } diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift index 4b06a6123..341b0f702 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift @@ -13,14 +13,8 @@ import DateParser public final class RSSParser { - private var parserData: ParserData - private var feedURL: String { - parserData.url - } - private var data: Data { - parserData.data - } - + private let feedURL: String + private let data: Data private let feed: RSSFeed private var articles = [RSSArticle]() private var currentArticle: RSSArticle? { @@ -34,16 +28,17 @@ public final class RSSParser { private var parsingAuthor = false private var currentAttributes: StringDictionary? - static func parsedFeed(with parserData: ParserData) -> RSSFeed { + static func parsedFeed(urlString: String, data: Data) -> RSSFeed { - let parser = RSSParser(parserData) + let parser = RSSParser(urlString: urlString, data: data) parser.parse() return parser.feed } - init(_ parserData: ParserData) { - self.parserData = parserData - self.feed = RSSFeed(urlString: parserData.url) + init(urlString: String, data: Data) { + self.feedURL = urlString + self.data = data + self.feed = RSSFeed(urlString: urlString) } } diff --git a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift index 21c345935..2474b286e 100644 --- a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift @@ -7,6 +7,7 @@ // import XCTest +import SAX import FeedParser final class RSSInJSONParserTests: XCTestCase { @@ -26,3 +27,11 @@ final class RSSInJSONParserTests: XCTestCase { XCTAssertEqual(parsedFeed.language, "en-us") } } + +extension FeedParser { + + static func parse(_ parserData: ParserData) throws -> ParsedFeed? { + + try FeedParser.parse(urlString: parserData.url, data: parserData.data) + } +} From fee2b3af900df2051715da80e349a237f835d63e Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 22:15:37 -0700 Subject: [PATCH 84/88] Get LocalAccount module building with new Parser module. --- Modules/LocalAccount/Package.swift | 2 +- .../Sources/LocalAccount/InitialFeedDownloader.swift | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Modules/LocalAccount/Package.swift b/Modules/LocalAccount/Package.swift index f35444936..ff06f3c41 100644 --- a/Modules/LocalAccount/Package.swift +++ b/Modules/LocalAccount/Package.swift @@ -18,7 +18,7 @@ let package = Package( .target( name: "LocalAccount", dependencies: [ - "Parser", + .product(name: "FeedParser", package: "Parser"), "Web" ], swiftSettings: [ diff --git a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift index 142833b02..b9df46c72 100644 --- a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift +++ b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift @@ -7,7 +7,7 @@ // import Foundation -import Parser +import FeedParser import Web public struct InitialFeedDownloader { @@ -22,8 +22,7 @@ public struct InitialFeedDownloader { return nil } - let parserData = ParserData(url: url.absoluteString, data: data) - guard let parsedFeed = try? await FeedParser.parse(parserData) else { + guard let parsedFeed = try? FeedParser.parse(urlString: url.absoluteString, data: data) else { return nil } From c0dd6aac0ddce732ff1776a76af5ab219ca17f91 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Mon, 23 Sep 2024 22:19:21 -0700 Subject: [PATCH 85/88] Get NewsBlur building. --- .../xcode/xcshareddata/xcschemes/NewsBlur.xcscheme | 12 ++++++++++++ Modules/NewsBlur/Package.swift | 2 -- .../Sources/NewsBlur/Models/NewsBlurFeed.swift | 1 - .../Sources/NewsBlur/Models/NewsBlurStory.swift | 1 - .../Sources/NewsBlur/Models/NewsBlurStoryHash.swift | 1 - 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme b/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme index a99b3d79f..b8cfb4d23 100644 --- a/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme +++ b/Modules/NewsBlur/.swiftpm/xcode/xcshareddata/xcschemes/NewsBlur.xcscheme @@ -29,6 +29,18 @@ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB" shouldUseLaunchSchemeArgsEnv = "YES" shouldAutocreateTestPlan = "YES"> + + + + + + Date: Mon, 23 Sep 2024 22:21:19 -0700 Subject: [PATCH 86/88] Get Feedly building. --- Modules/Feedly/Package.swift | 2 +- Modules/Feedly/Sources/Feedly/FeedlyModel.swift | 2 +- Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Modules/Feedly/Package.swift b/Modules/Feedly/Package.swift index dbe60617a..80db9dc0c 100644 --- a/Modules/Feedly/Package.swift +++ b/Modules/Feedly/Package.swift @@ -22,7 +22,7 @@ let package = Package( .target( name: "Feedly", dependencies: [ - "Parser", + .product(name: "FeedParser", package: "Parser"), "Articles", "Secrets", "Core", diff --git a/Modules/Feedly/Sources/Feedly/FeedlyModel.swift b/Modules/Feedly/Sources/Feedly/FeedlyModel.swift index 265830848..8a692941f 100644 --- a/Modules/Feedly/Sources/Feedly/FeedlyModel.swift +++ b/Modules/Feedly/Sources/Feedly/FeedlyModel.swift @@ -8,7 +8,7 @@ import Foundation import Articles -import Parser +import FeedParser public struct FeedlyCategory: Decodable, Sendable, Equatable { diff --git a/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift b/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift index cae0b4f5c..fe67be899 100644 --- a/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift +++ b/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift @@ -6,7 +6,7 @@ // import Foundation -import Parser +import FeedParser public final class FeedlyUtilities { From 591e451b69eed2434e60d5f9eddb45fcb86c6ed4 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 24 Sep 2024 14:45:53 -0700 Subject: [PATCH 87/88] =?UTF-8?q?Simplify=20Parser=20=E2=80=94=C2=A0use=20?= =?UTF-8?q?one=20target=20instead=20of=20multiple.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../xcschemes/OPMLParser.xcscheme | 67 ------------------- .../xcode/xcshareddata/xcschemes/SAX.xcscheme | 67 ------------------- Modules/Parser/Package.swift | 60 ++--------------- .../{ => Parser}/DateParser/DateParser.swift | 0 .../FeedParser/Feeds/FeedParser.swift | 0 .../FeedParser/Feeds/FeedParserError.swift | 0 .../FeedParser/Feeds/FeedType.swift | 1 - .../Feeds/JSON/JSONFeedParser.swift | 2 - .../Feeds/JSON/RSSInJSONParser.swift | 2 - .../FeedParser/Feeds/ParsedAttachment.swift | 0 .../FeedParser/Feeds/ParsedAuthor.swift | 0 .../FeedParser/Feeds/ParsedFeed.swift | 0 .../FeedParser/Feeds/ParsedHub.swift | 0 .../FeedParser/Feeds/ParsedItem.swift | 0 .../FeedParser/Feeds/XML/AtomParser.swift | 2 - .../FeedParser/Feeds/XML/RSSArticle.swift | 0 .../FeedParser/Feeds/XML/RSSAuthor.swift | 0 .../FeedParser/Feeds/XML/RSSEnclosure.swift | 0 .../FeedParser/Feeds/XML/RSSFeed.swift | 0 .../Feeds/XML/RSSFeedTransformer.swift | 0 .../FeedParser/Feeds/XML/RSSParser.swift | 2 - .../FeedParser/JSON/JSONTypes.swift | 0 .../FeedParser/JSON/JSONUtilities.swift | 0 .../{ => Parser}/HTMLParser/HTMLLink.swift | 0 .../HTMLParser/HTMLLinkParser.swift | 1 - .../HTMLParser/HTMLMetadata.swift | 1 - .../HTMLParser/HTMLMetadataParser.swift | 1 - .../{ => Parser}/HTMLParser/HTMLTag.swift | 0 .../OPMLParser/OPMLAttributes.swift | 0 .../OPMLParser/OPMLDocument.swift | 0 .../OPMLParser/OPMLFeedSpecifier.swift | 1 - .../{ => Parser}/OPMLParser/OPMLItem.swift | 0 .../{ => Parser}/OPMLParser/OPMLParser.swift | 1 - .../SAX/Extensions/Data+SAX.swift | 0 .../SAX/Extensions/Dictionary+SAX.swift | 0 .../SAX/Extensions/String+SAX.swift | 0 .../{ => Parser}/SAX/HTMLEntityDecoder.swift | 0 .../Sources/{ => Parser}/SAX/ParserData.swift | 0 .../{ => Parser}/SAX/SAXHTMLParser.swift | 0 .../Sources/{ => Parser}/SAX/SAXParser.swift | 0 .../{ => Parser}/SAX/SAXUtilities.swift | 0 .../Tests/ParserTests/AtomParserTests.swift | 2 +- .../Tests/ParserTests/DateParserTests.swift | 2 +- .../ParserTests/EntityDecodingTests.swift | 2 +- .../ParserTests/FeedParserTypeTests.swift | 3 +- .../Tests/ParserTests/HTMLLinkTests.swift | 3 +- .../Tests/ParserTests/HTMLMetadataTests.swift | 2 +- .../ParserTests/JSONFeedParserTests.swift | 2 +- .../Parser/Tests/ParserTests/OPMLTests.swift | 3 +- .../ParserTests/RSSInJSONParserTests.swift | 3 +- .../Tests/ParserTests/RSSParserTests.swift | 2 +- 51 files changed, 14 insertions(+), 218 deletions(-) delete mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme delete mode 100644 Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme rename Modules/Parser/Sources/{ => Parser}/DateParser/DateParser.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/FeedParser.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/FeedParserError.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/FeedType.swift (99%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/JSON/JSONFeedParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/JSON/RSSInJSONParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/ParsedAttachment.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/ParsedAuthor.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/ParsedFeed.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/ParsedHub.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/ParsedItem.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/AtomParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/RSSArticle.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/RSSAuthor.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/RSSEnclosure.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/RSSFeed.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/RSSFeedTransformer.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/Feeds/XML/RSSParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/JSON/JSONTypes.swift (100%) rename Modules/Parser/Sources/{ => Parser}/FeedParser/JSON/JSONUtilities.swift (100%) rename Modules/Parser/Sources/{ => Parser}/HTMLParser/HTMLLink.swift (100%) rename Modules/Parser/Sources/{ => Parser}/HTMLParser/HTMLLinkParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/HTMLParser/HTMLMetadata.swift (99%) rename Modules/Parser/Sources/{ => Parser}/HTMLParser/HTMLMetadataParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/HTMLParser/HTMLTag.swift (100%) rename Modules/Parser/Sources/{ => Parser}/OPMLParser/OPMLAttributes.swift (100%) rename Modules/Parser/Sources/{ => Parser}/OPMLParser/OPMLDocument.swift (100%) rename Modules/Parser/Sources/{ => Parser}/OPMLParser/OPMLFeedSpecifier.swift (98%) rename Modules/Parser/Sources/{ => Parser}/OPMLParser/OPMLItem.swift (100%) rename Modules/Parser/Sources/{ => Parser}/OPMLParser/OPMLParser.swift (99%) rename Modules/Parser/Sources/{ => Parser}/SAX/Extensions/Data+SAX.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/Extensions/Dictionary+SAX.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/Extensions/String+SAX.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/HTMLEntityDecoder.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/ParserData.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/SAXHTMLParser.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/SAXParser.swift (100%) rename Modules/Parser/Sources/{ => Parser}/SAX/SAXUtilities.swift (100%) diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme deleted file mode 100644 index 218cca92a..000000000 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/OPMLParser.xcscheme +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme b/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme deleted file mode 100644 index 0267c802a..000000000 --- a/Modules/Parser/.swiftpm/xcode/xcshareddata/xcschemes/SAX.xcscheme +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/Modules/Parser/Package.swift b/Modules/Parser/Package.swift index 9e749f804..e9aaf44ff 100644 --- a/Modules/Parser/Package.swift +++ b/Modules/Parser/Package.swift @@ -9,25 +9,9 @@ let package = Package( products: [ // Products define the executables and libraries a package produces, and make them visible to other packages. .library( - name: "FeedParser", + name: "Parser", type: .dynamic, - targets: ["FeedParser"]), - .library( - name: "SAX", - type: .dynamic, - targets: ["SAX"]), - .library( - name: "OPMLParser", - type: .dynamic, - targets: ["OPMLParser"]), - .library( - name: "HTMLParser", - type: .dynamic, - targets: ["HTMLParser"]), - .library( - name: "DateParser", - type: .dynamic, - targets: ["DateParser"]) + targets: ["Parser"]) ], dependencies: [ .package(path: "../FoundationExtras"), @@ -36,53 +20,17 @@ let package = Package( // Targets are the basic building blocks of a package. A target can define a module or a test suite. // Targets can depend on other targets in this package, and on products in packages this package depends on. .target( - name: "OPMLParser", - dependencies: [ - "SAX" - ], - swiftSettings: [ - .enableExperimentalFeature("StrictConcurrency") - ]), - .target( - name: "HTMLParser", + name: "Parser", dependencies: [ "FoundationExtras", - "SAX" ], swiftSettings: [ .enableExperimentalFeature("StrictConcurrency") ]), - .target( - name: "FeedParser", - dependencies: [ - "SAX", - "FoundationExtras", - "DateParser" - ], - swiftSettings: [ - .enableExperimentalFeature("StrictConcurrency") - ]), - .target( - name: "SAX", - dependencies: [ - "FoundationExtras" - ], - swiftSettings: [ - .enableExperimentalFeature("StrictConcurrency") - ]), - .target( - name: "DateParser", - dependencies: [], - swiftSettings: [ - .enableExperimentalFeature("StrictConcurrency") - ]), .testTarget( name: "ParserTests", dependencies: [ - "FeedParser", - "OPMLParser", - "DateParser", - "HTMLParser" + "Parser" ], exclude: ["Info.plist"], resources: [.copy("Resources")]), diff --git a/Modules/Parser/Sources/DateParser/DateParser.swift b/Modules/Parser/Sources/Parser/DateParser/DateParser.swift similarity index 100% rename from Modules/Parser/Sources/DateParser/DateParser.swift rename to Modules/Parser/Sources/Parser/DateParser/DateParser.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/FeedParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedParserError.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParserError.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/FeedParserError.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParserError.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift similarity index 99% rename from Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift index f26cd1ab2..21d70539d 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/FeedType.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift @@ -7,7 +7,6 @@ // import Foundation -import SAX public enum FeedType: Sendable { diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/JSONFeedParser.swift similarity index 99% rename from Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/JSONFeedParser.swift index 488a1db4c..a8665af2e 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/JSONFeedParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/JSONFeedParser.swift @@ -7,8 +7,6 @@ // import Foundation -import SAX -import DateParser // See https://jsonfeed.org/version/1.1 diff --git a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/RSSInJSONParser.swift similarity index 99% rename from Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/RSSInJSONParser.swift index 6e7feca1f..39f9623a7 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/JSON/RSSInJSONParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/JSON/RSSInJSONParser.swift @@ -7,8 +7,6 @@ // import Foundation -import SAX -import DateParser // See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md // Also: http://cyber.harvard.edu/rss/rss.html diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAttachment.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/ParsedAttachment.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAttachment.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAuthor.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/ParsedAuthor.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedAuthor.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedFeed.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/ParsedFeed.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedFeed.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedHub.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/ParsedHub.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedHub.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedItem.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/ParsedItem.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/ParsedItem.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/AtomParser.swift similarity index 99% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/AtomParser.swift index a77015c3f..ef7002390 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/AtomParser.swift @@ -8,8 +8,6 @@ import Foundation import FoundationExtras -import SAX -import DateParser final class AtomParser { diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSArticle.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSArticle.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/RSSArticle.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSArticle.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSAuthor.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/RSSAuthor.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSAuthor.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSEnclosure.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSEnclosure.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/RSSEnclosure.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSEnclosure.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeed.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeed.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeed.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeed.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeedTransformer.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/RSSFeedTransformer.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSFeedTransformer.swift diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSParser.swift similarity index 99% rename from Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift rename to Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSParser.swift index 341b0f702..75ead7108 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/RSSParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/XML/RSSParser.swift @@ -8,8 +8,6 @@ import Foundation import FoundationExtras -import SAX -import DateParser public final class RSSParser { diff --git a/Modules/Parser/Sources/FeedParser/JSON/JSONTypes.swift b/Modules/Parser/Sources/Parser/FeedParser/JSON/JSONTypes.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/JSON/JSONTypes.swift rename to Modules/Parser/Sources/Parser/FeedParser/JSON/JSONTypes.swift diff --git a/Modules/Parser/Sources/FeedParser/JSON/JSONUtilities.swift b/Modules/Parser/Sources/Parser/FeedParser/JSON/JSONUtilities.swift similarity index 100% rename from Modules/Parser/Sources/FeedParser/JSON/JSONUtilities.swift rename to Modules/Parser/Sources/Parser/FeedParser/JSON/JSONUtilities.swift diff --git a/Modules/Parser/Sources/HTMLParser/HTMLLink.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift similarity index 100% rename from Modules/Parser/Sources/HTMLParser/HTMLLink.swift rename to Modules/Parser/Sources/Parser/HTMLParser/HTMLLink.swift diff --git a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift similarity index 99% rename from Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift rename to Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift index 00994fd63..d084860d4 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLLinkParser.swift +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLLinkParser.swift @@ -7,7 +7,6 @@ import Foundation import FoundationExtras -import SAX public final class HTMLLinkParser { diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift similarity index 99% rename from Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift rename to Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift index bf804d760..651fd7a58 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadata.swift @@ -6,7 +6,6 @@ // import Foundation -import SAX public final class HTMLMetadata { diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadataParser.swift similarity index 99% rename from Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift rename to Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadataParser.swift index ab96da818..fe317067a 100644 --- a/Modules/Parser/Sources/HTMLParser/HTMLMetadataParser.swift +++ b/Modules/Parser/Sources/Parser/HTMLParser/HTMLMetadataParser.swift @@ -7,7 +7,6 @@ import Foundation import FoundationExtras -import SAX public final class HTMLMetadataParser { diff --git a/Modules/Parser/Sources/HTMLParser/HTMLTag.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift similarity index 100% rename from Modules/Parser/Sources/HTMLParser/HTMLTag.swift rename to Modules/Parser/Sources/Parser/HTMLParser/HTMLTag.swift diff --git a/Modules/Parser/Sources/OPMLParser/OPMLAttributes.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLAttributes.swift similarity index 100% rename from Modules/Parser/Sources/OPMLParser/OPMLAttributes.swift rename to Modules/Parser/Sources/Parser/OPMLParser/OPMLAttributes.swift diff --git a/Modules/Parser/Sources/OPMLParser/OPMLDocument.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift similarity index 100% rename from Modules/Parser/Sources/OPMLParser/OPMLDocument.swift rename to Modules/Parser/Sources/Parser/OPMLParser/OPMLDocument.swift diff --git a/Modules/Parser/Sources/OPMLParser/OPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift similarity index 98% rename from Modules/Parser/Sources/OPMLParser/OPMLFeedSpecifier.swift rename to Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift index d22f566b1..2b5e43856 100644 --- a/Modules/Parser/Sources/OPMLParser/OPMLFeedSpecifier.swift +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift @@ -6,7 +6,6 @@ // import Foundation -import SAX public struct OPMLFeedSpecifier: Sendable { diff --git a/Modules/Parser/Sources/OPMLParser/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift similarity index 100% rename from Modules/Parser/Sources/OPMLParser/OPMLItem.swift rename to Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift diff --git a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift similarity index 99% rename from Modules/Parser/Sources/OPMLParser/OPMLParser.swift rename to Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift index ab21c4185..4ec38d5b1 100644 --- a/Modules/Parser/Sources/OPMLParser/OPMLParser.swift +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLParser.swift @@ -6,7 +6,6 @@ // import Foundation -import SAX public final class OPMLParser { diff --git a/Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift b/Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift similarity index 100% rename from Modules/Parser/Sources/SAX/Extensions/Data+SAX.swift rename to Modules/Parser/Sources/Parser/SAX/Extensions/Data+SAX.swift diff --git a/Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift b/Modules/Parser/Sources/Parser/SAX/Extensions/Dictionary+SAX.swift similarity index 100% rename from Modules/Parser/Sources/SAX/Extensions/Dictionary+SAX.swift rename to Modules/Parser/Sources/Parser/SAX/Extensions/Dictionary+SAX.swift diff --git a/Modules/Parser/Sources/SAX/Extensions/String+SAX.swift b/Modules/Parser/Sources/Parser/SAX/Extensions/String+SAX.swift similarity index 100% rename from Modules/Parser/Sources/SAX/Extensions/String+SAX.swift rename to Modules/Parser/Sources/Parser/SAX/Extensions/String+SAX.swift diff --git a/Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/Parser/SAX/HTMLEntityDecoder.swift similarity index 100% rename from Modules/Parser/Sources/SAX/HTMLEntityDecoder.swift rename to Modules/Parser/Sources/Parser/SAX/HTMLEntityDecoder.swift diff --git a/Modules/Parser/Sources/SAX/ParserData.swift b/Modules/Parser/Sources/Parser/SAX/ParserData.swift similarity index 100% rename from Modules/Parser/Sources/SAX/ParserData.swift rename to Modules/Parser/Sources/Parser/SAX/ParserData.swift diff --git a/Modules/Parser/Sources/SAX/SAXHTMLParser.swift b/Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift similarity index 100% rename from Modules/Parser/Sources/SAX/SAXHTMLParser.swift rename to Modules/Parser/Sources/Parser/SAX/SAXHTMLParser.swift diff --git a/Modules/Parser/Sources/SAX/SAXParser.swift b/Modules/Parser/Sources/Parser/SAX/SAXParser.swift similarity index 100% rename from Modules/Parser/Sources/SAX/SAXParser.swift rename to Modules/Parser/Sources/Parser/SAX/SAXParser.swift diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift similarity index 100% rename from Modules/Parser/Sources/SAX/SAXUtilities.swift rename to Modules/Parser/Sources/Parser/SAX/SAXUtilities.swift diff --git a/Modules/Parser/Tests/ParserTests/AtomParserTests.swift b/Modules/Parser/Tests/ParserTests/AtomParserTests.swift index eb2f738ed..94fd6cbe0 100644 --- a/Modules/Parser/Tests/ParserTests/AtomParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/AtomParserTests.swift @@ -7,7 +7,7 @@ // import XCTest -import FeedParser +import Parser final class AtomParserTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/DateParserTests.swift b/Modules/Parser/Tests/ParserTests/DateParserTests.swift index 3861e15f4..4f03febbd 100644 --- a/Modules/Parser/Tests/ParserTests/DateParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/DateParserTests.swift @@ -7,7 +7,7 @@ import Foundation import XCTest -@testable import DateParser +@testable import Parser final class DateParserTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift index 292b6b183..db8112cf3 100644 --- a/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift +++ b/Modules/Parser/Tests/ParserTests/EntityDecodingTests.swift @@ -7,7 +7,7 @@ // import XCTest -import SAX +import Parser final class EntityDecodingTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift index 8600936e2..3649e5acd 100644 --- a/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift +++ b/Modules/Parser/Tests/ParserTests/FeedParserTypeTests.swift @@ -7,8 +7,7 @@ // import XCTest -@testable import FeedParser -import SAX +@testable import Parser final class FeedParserTypeTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift index cdc8834d6..7e7c69acc 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift @@ -7,8 +7,7 @@ // import XCTest -import HTMLParser -import libxml2 +import Parser final class HTMLLinkTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift index 4133a9a8c..6adc5de7f 100644 --- a/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift +++ b/Modules/Parser/Tests/ParserTests/HTMLMetadataTests.swift @@ -7,7 +7,7 @@ // import XCTest -import HTMLParser +import Parser final class HTMLMetadataTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift index 3e605ed16..f96dde79e 100644 --- a/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/JSONFeedParserTests.swift @@ -7,7 +7,7 @@ // import XCTest -import FeedParser +import Parser final class JSONFeedParserTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/OPMLTests.swift b/Modules/Parser/Tests/ParserTests/OPMLTests.swift index 4e8400e56..563cd149e 100644 --- a/Modules/Parser/Tests/ParserTests/OPMLTests.swift +++ b/Modules/Parser/Tests/ParserTests/OPMLTests.swift @@ -7,8 +7,7 @@ // import XCTest -import SAX -@testable import OPMLParser +@testable import Parser final class OPMLTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift index 2474b286e..240076af2 100644 --- a/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSInJSONParserTests.swift @@ -7,8 +7,7 @@ // import XCTest -import SAX -import FeedParser +import Parser final class RSSInJSONParserTests: XCTestCase { diff --git a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift index 25f71fad3..62f9d0294 100644 --- a/Modules/Parser/Tests/ParserTests/RSSParserTests.swift +++ b/Modules/Parser/Tests/ParserTests/RSSParserTests.swift @@ -7,7 +7,7 @@ // import XCTest -import FeedParser +import Parser final class RSSParserTests: XCTestCase { From 4b5694fe743e7b63aa1396ad86ae42de00bd4320 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 24 Sep 2024 22:31:21 -0700 Subject: [PATCH 88/88] Fix build errors. --- Modules/Account/Sources/Account/Account.swift | 8 +- .../CloudKitAccountDelegate.swift | 4 +- .../LocalAccountDelegate.swift | 8 +- .../Account/Sources/Account/OPMLFile.swift | 16 +-- .../Sources/Account/OPMLNormalizer.swift | 16 +-- .../ArticlesDatabase/SearchTable.swift | 2 +- .../CloudKitSync/CloudKitAccountZone.swift | 8 +- .../Sources/FeedFinder/FeedFinder.swift | 9 +- .../Sources/FeedFinder/HTMLFeedFinder.swift | 18 ++-- .../Sources/Feedbin/FeedbinEntry.swift | 2 +- Modules/Feedly/Package.swift | 2 +- .../Feedly/Sources/Feedly/FeedlyModel.swift | 2 +- .../Sources/Feedly/FeedlyUtilities.swift | 2 +- .../Images/Favicons/FaviconDownloader.swift | 3 +- .../Images/Favicons/FaviconURLFinder.swift | 6 +- .../Images/FeaturedImageDownloader.swift | 2 +- .../Sources/Images/FeedIconDownloader.swift | 6 +- .../Images/HTMLMetadata+Extension.swift | 67 +++++++++++++ .../Images/RSHTMLMetadata+Extension.swift | 98 ------------------- Modules/LocalAccount/Package.swift | 2 +- .../LocalAccount/InitialFeedDownloader.swift | 2 +- .../Parser/FeedParser/Feeds/FeedParser.swift | 5 + .../HTMLEntityDecoder.swift | 0 .../Parser/OPMLParser/OPMLFeedSpecifier.swift | 8 +- .../Sources/Parser/OPMLParser/OPMLItem.swift | 2 +- Shared/AppDelegate+Shared.swift | 3 +- .../Extensions/ArticleStringFormatter.swift | 5 +- .../NSAttributedString+NetNewsWire.swift | 2 +- .../HTMLMetadata/HTMLMetadataDownloader.swift | 10 +- 29 files changed, 143 insertions(+), 175 deletions(-) create mode 100644 Modules/Images/Sources/Images/HTMLMetadata+Extension.swift delete mode 100644 Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift rename Modules/Parser/Sources/Parser/{SAX => HTMLParser}/HTMLEntityDecoder.swift (100%) diff --git a/Modules/Account/Sources/Account/Account.swift b/Modules/Account/Sources/Account/Account.swift index efc42748b..f6343c755 100644 --- a/Modules/Account/Sources/Account/Account.swift +++ b/Modules/Account/Sources/Account/Account.swift @@ -464,14 +464,14 @@ public enum FetchType { delegate.accountWillBeDeleted(self) } - func addOPMLItems(_ items: [RSOPMLItem]) { + func addOPMLItems(_ items: [OPMLItem]) { for item in items { if let feedSpecifier = item.feedSpecifier { addFeed(newFeed(with: feedSpecifier)) } else { if let title = item.titleFromAttributes, let folder = ensureFolder(with: title) { folder.externalID = item.attributes?["nnw_externalID"] as? String - if let children = item.children { + if let children = item.items { for itemChild in children { if let feedSpecifier = itemChild.feedSpecifier { folder.addFeed(newFeed(with: feedSpecifier)) @@ -483,7 +483,7 @@ public enum FetchType { } } - func loadOPMLItems(_ items: [RSOPMLItem]) { + func loadOPMLItems(_ items: [OPMLItem]) { addOPMLItems(OPMLNormalizer.normalize(items)) } @@ -560,7 +560,7 @@ public enum FetchType { return folders?.first(where: { $0.externalID == externalID }) } - func newFeed(with opmlFeedSpecifier: RSOPMLFeedSpecifier) -> Feed { + func newFeed(with opmlFeedSpecifier: OPMLFeedSpecifier) -> Feed { let feedURL = opmlFeedSpecifier.feedURL let metadata = feedMetadata(feedURL: feedURL, feedID: feedURL) let feed = Feed(account: self, url: opmlFeedSpecifier.feedURL, metadata: metadata) diff --git a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift index 544a30e93..e65cc424a 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift @@ -175,9 +175,9 @@ enum CloudKitAccountDelegateError: LocalizedError { let opmlData = try Data(contentsOf: opmlFile) let parserData = ParserData(url: opmlFile.absoluteString, data: opmlData) - let opmlDocument = try RSOPMLParser.parseOPML(with: parserData) + let opmlDocument = OPMLParser.document(with: parserData) - guard let opmlItems = opmlDocument.children, let rootExternalID = account.externalID else { + guard let opmlItems = opmlDocument?.items, let rootExternalID = account.externalID else { return } diff --git a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift index 00a9b05f0..1695d12c7 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift @@ -77,8 +77,8 @@ final class LocalAccountDelegate: AccountDelegate { let opmlData = try Data(contentsOf: opmlFile) let parserData = ParserData(url: opmlFile.absoluteString, data: opmlData) - let opmlDocument = try RSOPMLParser.parseOPML(with: parserData) - guard let children = opmlDocument.children else { + let opmlDocument = OPMLParser.document(with: parserData) + guard let children = opmlDocument?.items else { return } @@ -267,9 +267,7 @@ private extension LocalAccountDelegate { return } - let parserData = ParserData(url: feed.url, data: data) - - guard let parsedFeed = try? await FeedParser.parse(parserData) else { + guard let parsedFeed = try? await FeedParser.parseAsync(urlString: feed.url, data: data) else { return } diff --git a/Modules/Account/Sources/Account/OPMLFile.swift b/Modules/Account/Sources/Account/OPMLFile.swift index a8fe0593f..39221856e 100644 --- a/Modules/Account/Sources/Account/OPMLFile.swift +++ b/Modules/Account/Sources/Account/OPMLFile.swift @@ -33,7 +33,7 @@ import Core dataFile.markAsDirty() } - func opmlItems() -> [RSOPMLItem]? { + func opmlItems() -> [OPMLItem]? { guard let fileData = opmlFileData() else { return nil } @@ -61,18 +61,10 @@ private extension OPMLFile { return fileData } - func parsedOPMLItems(fileData: Data) -> [RSOPMLItem]? { + func parsedOPMLItems(fileData: Data) -> [OPMLItem]? { let parserData = ParserData(url: fileURL.absoluteString, data: fileData) - var opmlDocument: RSOPMLDocument? - - do { - opmlDocument = try RSOPMLParser.parseOPML(with: parserData) - } catch { - logger.error("OPML Import failed for \(self.fileURL): \(error.localizedDescription)") - return nil - } - - return opmlDocument?.children + let opmlDocument = OPMLParser.document(with: parserData) + return opmlDocument?.items } func opmlDocument() -> String { diff --git a/Modules/Account/Sources/Account/OPMLNormalizer.swift b/Modules/Account/Sources/Account/OPMLNormalizer.swift index ab6111f89..2d5de573f 100644 --- a/Modules/Account/Sources/Account/OPMLNormalizer.swift +++ b/Modules/Account/Sources/Account/OPMLNormalizer.swift @@ -11,16 +11,16 @@ import Parser final class OPMLNormalizer { - var normalizedOPMLItems = [RSOPMLItem]() + var normalizedOPMLItems = [OPMLItem]() - static func normalize(_ items: [RSOPMLItem]) -> [RSOPMLItem] { + static func normalize(_ items: [OPMLItem]) -> [OPMLItem] { let opmlNormalizer = OPMLNormalizer() opmlNormalizer.normalize(items) return opmlNormalizer.normalizedOPMLItems } - private func normalize(_ items: [RSOPMLItem], parentFolder: RSOPMLItem? = nil) { - var feedsToAdd = [RSOPMLItem]() + private func normalize(_ items: [OPMLItem], parentFolder: OPMLItem? = nil) { + var feedsToAdd = [OPMLItem]() for item in items { @@ -33,14 +33,14 @@ final class OPMLNormalizer { guard let _ = item.titleFromAttributes else { // Folder doesn’t have a name, so it won’t be created, and its items will go one level up. - if let itemChildren = item.children { + if let itemChildren = item.items { normalize(itemChildren, parentFolder: parentFolder) } continue } feedsToAdd.append(item) - if let itemChildren = item.children { + if let itemChildren = item.items { if let parentFolder = parentFolder { normalize(itemChildren, parentFolder: parentFolder) } else { @@ -51,8 +51,8 @@ final class OPMLNormalizer { if let parentFolder = parentFolder { for feed in feedsToAdd { - if !(parentFolder.children?.contains(where: { $0.feedSpecifier?.feedURL == feed.feedSpecifier?.feedURL}) ?? false) { - parentFolder.addChild(feed) + if !(parentFolder.items?.contains(where: { $0.feedSpecifier?.feedURL == feed.feedSpecifier?.feedURL}) ?? false) { + parentFolder.add(feed) } } } else { diff --git a/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift b/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift index fa78de31c..b96fbf432 100644 --- a/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift +++ b/Modules/ArticlesDatabase/Sources/ArticlesDatabase/SearchTable.swift @@ -33,7 +33,7 @@ final class ArticleSearchInfo: Hashable { } lazy var bodyForIndex: String = { - let s = preferredText.rsparser_stringByDecodingHTMLEntities() + let s = HTMLEntityDecoder.decodedString(preferredText) let sanitizedBody = s.strippingHTML().collapsingWhitespace if let authorsNames = authorsNames { diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift index e4a18cc51..fea5368d4 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift @@ -56,12 +56,12 @@ enum CloudKitAccountZoneError: LocalizedError { migrateChangeToken() } - public func importOPML(rootExternalID: String, items: [RSOPMLItem]) async throws { + public func importOPML(rootExternalID: String, items: [OPMLItem]) async throws { var records = [CKRecord]() var feedRecords = [String: CKRecord]() - func processFeed(feedSpecifier: RSOPMLFeedSpecifier, containerExternalID: String) { + func processFeed(feedSpecifier: OPMLFeedSpecifier, containerExternalID: String) { if let feedRecord = feedRecords[feedSpecifier.feedURL], var containerExternalIDs = feedRecord[CloudKitFeed.Fields.containerExternalIDs] as? [String] { containerExternalIDs.append(containerExternalID) feedRecord[CloudKitFeed.Fields.containerExternalIDs] = containerExternalIDs @@ -79,7 +79,7 @@ enum CloudKitAccountZoneError: LocalizedError { if let title = item.titleFromAttributes { let containerRecord = newContainerCKRecord(name: title) records.append(containerRecord) - item.children?.forEach { itemChild in + item.items?.forEach { itemChild in if let feedSpecifier = itemChild.feedSpecifier { processFeed(feedSpecifier: feedSpecifier, containerExternalID: containerRecord.externalID) } @@ -288,7 +288,7 @@ enum CloudKitAccountZoneError: LocalizedError { private extension CloudKitAccountZone { - func newFeedCKRecord(feedSpecifier: RSOPMLFeedSpecifier, containerExternalID: String) -> CKRecord { + func newFeedCKRecord(feedSpecifier: OPMLFeedSpecifier, containerExternalID: String) -> CKRecord { let record = CKRecord(recordType: CloudKitFeed.recordType, recordID: generateRecordID()) record[CloudKitFeed.Fields.url] = feedSpecifier.feedURL diff --git a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift index d2dd7cfc1..3d8503c5c 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift @@ -55,7 +55,7 @@ public final class FeedFinder { throw AccountError.createErrorNotFound } - if FeedFinder.isFeed(data, url.absoluteString) { + if FeedFinder.isFeed(data) { logger.info("FeedFinder: is feed \(url)") let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1) return Set([feedSpecifier]) @@ -156,7 +156,7 @@ private extension FeedFinder { if let downloadData = try? await DownloadWithCacheManager.shared.download(url) { if let data = downloadData.data, let response = downloadData.response, response.statusIsOK { - if isFeed(data, downloadFeedSpecifier.urlString) { + if isFeed(data) { addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers) } } @@ -166,8 +166,7 @@ private extension FeedFinder { return Set(resultFeedSpecifiers.values) } - static func isFeed(_ data: Data, _ urlString: String) -> Bool { - let parserData = ParserData(url: urlString, data: data) - return FeedParser.canParse(parserData) + static func isFeed(_ data: Data) -> Bool { + return FeedParser.canParse(data) } } diff --git a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift index 805bc047b..6c51c9b2f 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift @@ -21,18 +21,20 @@ class HTMLFeedFinder { private var feedSpecifiersDictionary = [String: FeedSpecifier]() init(parserData: ParserData) { - let metadata = RSHTMLMetadataParser.htmlMetadata(with: parserData) + let metadata = HTMLMetadataParser.metadata(with: parserData) var orderFound = 0 - for oneFeedLink in metadata.feedLinks { - if let oneURLString = oneFeedLink.urlString?.normalizedURL { - orderFound = orderFound + 1 - let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead, orderFound: orderFound) - addFeedSpecifier(oneFeedSpecifier) + if let feedLinks = metadata.feedLinks { + for oneFeedLink in feedLinks { + if let oneURLString = oneFeedLink.urlString?.normalizedURL { + orderFound = orderFound + 1 + let oneFeedSpecifier = FeedSpecifier(title: oneFeedLink.title, urlString: oneURLString, source: .HTMLHead, orderFound: orderFound) + addFeedSpecifier(oneFeedSpecifier) + } } } - let bodyLinks = RSHTMLLinkParser.htmlLinks(with: parserData) + let bodyLinks = HTMLLinkParser.htmlLinks(with: parserData) for oneBodyLink in bodyLinks { if linkMightBeFeed(oneBodyLink), let normalizedURL = oneBodyLink.urlString?.normalizedURL { orderFound = orderFound + 1 @@ -70,7 +72,7 @@ private extension HTMLFeedFinder { return false } - func linkMightBeFeed(_ link: RSHTMLLink) -> Bool { + func linkMightBeFeed(_ link: HTMLLink) -> Bool { if let linkURLString = link.urlString, urlStringMightBeFeed(linkURLString) { return true } diff --git a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift index 03e90215a..a7675c585 100644 --- a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift +++ b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift @@ -28,7 +28,7 @@ public final class FeedbinEntry: Decodable, @unchecked Sendable { // and letting the one date fail when parsed. public lazy var parsedDatePublished: Date? = { if let datePublished = datePublished { - return RSDateWithString(datePublished) + return DateParser.date(string: datePublished) } else { return nil diff --git a/Modules/Feedly/Package.swift b/Modules/Feedly/Package.swift index 80db9dc0c..dbe60617a 100644 --- a/Modules/Feedly/Package.swift +++ b/Modules/Feedly/Package.swift @@ -22,7 +22,7 @@ let package = Package( .target( name: "Feedly", dependencies: [ - .product(name: "FeedParser", package: "Parser"), + "Parser", "Articles", "Secrets", "Core", diff --git a/Modules/Feedly/Sources/Feedly/FeedlyModel.swift b/Modules/Feedly/Sources/Feedly/FeedlyModel.swift index 8a692941f..265830848 100644 --- a/Modules/Feedly/Sources/Feedly/FeedlyModel.swift +++ b/Modules/Feedly/Sources/Feedly/FeedlyModel.swift @@ -8,7 +8,7 @@ import Foundation import Articles -import FeedParser +import Parser public struct FeedlyCategory: Decodable, Sendable, Equatable { diff --git a/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift b/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift index fe67be899..cae0b4f5c 100644 --- a/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift +++ b/Modules/Feedly/Sources/Feedly/FeedlyUtilities.swift @@ -6,7 +6,7 @@ // import Foundation -import FeedParser +import Parser public final class FeedlyUtilities { diff --git a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift index 4055b7b73..64ebf4b31 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift @@ -12,6 +12,7 @@ import Articles import Account import UniformTypeIdentifiers import Core +import Parser public extension Notification.Name { static let FaviconDidBecomeAvailable = Notification.Name("FaviconDidBecomeAvailableNotification") // userInfo key: FaviconDownloader.UserInfoKey.faviconURL @@ -21,7 +22,7 @@ public protocol FaviconDownloaderDelegate { @MainActor var appIconImage: IconImage? { get } - @MainActor func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata? + @MainActor func downloadMetadata(_ url: String) async throws -> HTMLMetadata? } @MainActor public final class FaviconDownloader { diff --git a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift index f959ff3b1..4c9689b18 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift @@ -22,7 +22,7 @@ import UniformTypeIdentifiers /// - Parameters: /// - homePageURL: The page to search. /// - urls: An array of favicon URLs as strings. - static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> RSHTMLMetadata?)) async -> [String]? { + static func findFaviconURLs(with homePageURL: String, downloadMetadata: ((String) async throws -> HTMLMetadata?)) async -> [String]? { guard let _ = URL(string: homePageURL) else { return nil @@ -31,14 +31,14 @@ import UniformTypeIdentifiers // If the favicon has an explicit type, check that for an ignored type; otherwise, check the file extension. let htmlMetadata = try? await downloadMetadata(homePageURL) - let faviconURLs = htmlMetadata?.favicons.compactMap { favicon -> String? in + let faviconURLs = htmlMetadata?.favicons?.compactMap { favicon -> String? in shouldAllowFavicon(favicon) ? favicon.urlString : nil } return faviconURLs } - static func shouldAllowFavicon(_ favicon: RSHTMLMetadataFavicon) -> Bool { + static func shouldAllowFavicon(_ favicon: HTMLMetadataFavicon) -> Bool { // Check mime type. if let mimeType = favicon.type, let utType = UTType(mimeType: mimeType) { diff --git a/Modules/Images/Sources/Images/FeaturedImageDownloader.swift b/Modules/Images/Sources/Images/FeaturedImageDownloader.swift index 1746cde36..992c6902e 100644 --- a/Modules/Images/Sources/Images/FeaturedImageDownloader.swift +++ b/Modules/Images/Sources/Images/FeaturedImageDownloader.swift @@ -87,7 +87,7 @@ // } // } // -// func pullFeaturedImageURL(from metadata: RSHTMLMetadata, articleURL: String) { +// func pullFeaturedImageURL(from metadata: HTMLMetadata, articleURL: String) { // // if let url = metadata.bestFeaturedImageURL() { // cacheURL(for: articleURL, url) diff --git a/Modules/Images/Sources/Images/FeedIconDownloader.swift b/Modules/Images/Sources/Images/FeedIconDownloader.swift index 55a66363c..c833531ae 100644 --- a/Modules/Images/Sources/Images/FeedIconDownloader.swift +++ b/Modules/Images/Sources/Images/FeedIconDownloader.swift @@ -22,7 +22,7 @@ public protocol FeedIconDownloaderDelegate: Sendable { @MainActor var appIconImage: IconImage? { get } - func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata? + func downloadMetadata(_ url: String) async throws -> HTMLMetadata? } @MainActor public final class FeedIconDownloader { @@ -217,7 +217,7 @@ private extension FeedIconDownloader { homePageToIconURLCacheDirty = true } - func findIconURLForHomePageURL(_ homePageURL: String, feed: Feed, downloadMetadata: @escaping (String) async throws -> RSHTMLMetadata?) { + func findIconURLForHomePageURL(_ homePageURL: String, feed: Feed, downloadMetadata: @escaping (String) async throws -> HTMLMetadata?) { guard !urlsInProgress.contains(homePageURL) else { return @@ -236,7 +236,7 @@ private extension FeedIconDownloader { } } - func pullIconURL(from metadata: RSHTMLMetadata, homePageURL: String, feed: Feed) { + func pullIconURL(from metadata: HTMLMetadata, homePageURL: String, feed: Feed) { if let url = metadata.bestWebsiteIconURL() { cacheIconURL(for: homePageURL, url) diff --git a/Modules/Images/Sources/Images/HTMLMetadata+Extension.swift b/Modules/Images/Sources/Images/HTMLMetadata+Extension.swift new file mode 100644 index 000000000..188791b61 --- /dev/null +++ b/Modules/Images/Sources/Images/HTMLMetadata+Extension.swift @@ -0,0 +1,67 @@ +// +// HTMLMetadata+Extension.swift +// NetNewsWire +// +// Created by Brent Simmons on 11/26/17. +// Copyright © 2017 Ranchero Software. All rights reserved. +// + +import Foundation +import Parser + +extension HTMLMetadata { + + func largestAppleTouchIcon() -> String? { + + guard let icons = appleTouchIcons, !icons.isEmpty else { + return nil + } + + var bestImage: HTMLMetadataAppleTouchIcon? = nil + + for image in icons { + + guard let imageSize = image.size else { + continue + } + if imageSize.width / imageSize.height > 2 { + continue + } + + guard let currentBestImage = bestImage, let bestImageSize = currentBestImage.size else { + bestImage = image + continue + } + + if imageSize.height > bestImageSize.height && imageSize.width > bestImageSize.width { + bestImage = image + } + } + + return bestImage?.urlString ?? icons.first?.urlString + } + + func bestWebsiteIconURL() -> String? { + + // TODO: metadata icons — sometimes they’re large enough to use here. + + if let appleTouchIcon = largestAppleTouchIcon() { + return appleTouchIcon + } + + if let openGraphImageURL = openGraphProperties?.image { + return openGraphImageURL.url + } + + return twitterProperties?.imageURL + } + + func bestFeaturedImageURL() -> String? { + + if let openGraphImageURL = openGraphProperties?.image { + return openGraphImageURL.url + } + + return twitterProperties?.imageURL + } +} diff --git a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift b/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift deleted file mode 100644 index fd5da45d7..000000000 --- a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift +++ /dev/null @@ -1,98 +0,0 @@ -// -// RSHTMLMetadata+Extension.swift -// NetNewsWire -// -// Created by Brent Simmons on 11/26/17. -// Copyright © 2017 Ranchero Software. All rights reserved. -// - -import Foundation -import Parser - -extension RSHTMLMetadata { - - func largestOpenGraphImageURL() -> String? { - let openGraphImages = openGraphProperties.images - - guard !openGraphImages.isEmpty else { - return nil - } - - var bestImage: RSHTMLOpenGraphImage? = nil - - for image in openGraphImages { - if image.width / image.height > 2 { - continue - } - if bestImage == nil { - bestImage = image - continue - } - if image.height > bestImage!.height && image.width > bestImage!.width { - bestImage = image - } - } - - guard let url = bestImage?.secureURL ?? bestImage?.url else { - return nil - } - - // Bad ones we should ignore. - let badURLs = Set(["https://s0.wp.com/i/blank.jpg"]) - guard !badURLs.contains(url) else { - return nil - } - - return url - } - - func largestAppleTouchIcon() -> String? { - - let icons = appleTouchIcons - - guard !icons.isEmpty else { - return nil - } - - var bestImage: RSHTMLMetadataAppleTouchIcon? = nil - - for image in icons { - if image.size.width / image.size.height > 2 { - continue - } - if bestImage == nil { - bestImage = image - continue - } - if image.size.height > bestImage!.size.height && image.size.width > bestImage!.size.width { - bestImage = image; - } - } - - return bestImage?.urlString - } - - func bestWebsiteIconURL() -> String? { - - // TODO: metadata icons — sometimes they’re large enough to use here. - - if let appleTouchIcon = largestAppleTouchIcon() { - return appleTouchIcon - } - - if let openGraphImageURL = largestOpenGraphImageURL() { - return openGraphImageURL - } - - return twitterProperties.imageURL - } - - func bestFeaturedImageURL() -> String? { - - if let openGraphImageURL = largestOpenGraphImageURL() { - return openGraphImageURL - } - - return twitterProperties.imageURL - } -} diff --git a/Modules/LocalAccount/Package.swift b/Modules/LocalAccount/Package.swift index ff06f3c41..f35444936 100644 --- a/Modules/LocalAccount/Package.swift +++ b/Modules/LocalAccount/Package.swift @@ -18,7 +18,7 @@ let package = Package( .target( name: "LocalAccount", dependencies: [ - .product(name: "FeedParser", package: "Parser"), + "Parser", "Web" ], swiftSettings: [ diff --git a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift index b9df46c72..a384fec4d 100644 --- a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift +++ b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift @@ -7,7 +7,7 @@ // import Foundation -import FeedParser +import Parser import Web public struct InitialFeedDownloader { diff --git a/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift index cc124c43b..ea91797bb 100644 --- a/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift +++ b/Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedParser.swift @@ -49,4 +49,9 @@ public struct FeedParser { return nil } } + + public static func parseAsync(urlString: String, data: Data) async throws -> ParsedFeed? { + + try parse(urlString: urlString, data: data) + } } diff --git a/Modules/Parser/Sources/Parser/SAX/HTMLEntityDecoder.swift b/Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift similarity index 100% rename from Modules/Parser/Sources/Parser/SAX/HTMLEntityDecoder.swift rename to Modules/Parser/Sources/Parser/HTMLParser/HTMLEntityDecoder.swift diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift index 2b5e43856..a0cd12df6 100644 --- a/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLFeedSpecifier.swift @@ -9,10 +9,10 @@ import Foundation public struct OPMLFeedSpecifier: Sendable { - let title: String? - let feedDescription: String? - let homePageURL: String? - let feedURL: String + public let title: String? + public let feedDescription: String? + public let homePageURL: String? + public let feedURL: String init(title: String?, feedDescription: String?, homePageURL: String?, feedURL: String) { diff --git a/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift b/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift index 2f0e972e9..57e4232dc 100644 --- a/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift +++ b/Modules/Parser/Sources/Parser/OPMLParser/OPMLItem.swift @@ -32,7 +32,7 @@ public class OPMLItem { } } - func add(_ item: OPMLItem) { + public func add(_ item: OPMLItem) { if items == nil { items = [OPMLItem]() diff --git a/Shared/AppDelegate+Shared.swift b/Shared/AppDelegate+Shared.swift index ebfe1a8d4..02484da2a 100644 --- a/Shared/AppDelegate+Shared.swift +++ b/Shared/AppDelegate+Shared.swift @@ -9,6 +9,7 @@ import Foundation import Images import Account +import Parser extension AppDelegate: FaviconDownloaderDelegate, FeedIconDownloaderDelegate { @@ -16,7 +17,7 @@ extension AppDelegate: FaviconDownloaderDelegate, FeedIconDownloaderDelegate { IconImage.appIcon } - func downloadMetadata(_ url: String) async throws -> RSHTMLMetadata? { + func downloadMetadata(_ url: String) async throws -> HTMLMetadata? { await HTMLMetadataDownloader.downloadMetadata(for: url) } diff --git a/Shared/Extensions/ArticleStringFormatter.swift b/Shared/Extensions/ArticleStringFormatter.swift index a0d0c11e7..4d0484d08 100644 --- a/Shared/Extensions/ArticleStringFormatter.swift +++ b/Shared/Extensions/ArticleStringFormatter.swift @@ -66,7 +66,7 @@ import Parser s = s.replacingOccurrences(of: "\t", with: "") if !forHTML { - s = s.rsparser_stringByDecodingHTMLEntities() + s = HTMLEntityDecoder.decodedString(s) } s = s.trimmingWhitespace @@ -98,8 +98,9 @@ import Parser if let cachedBody = summaryCache[key] { return cachedBody } - var s = body.rsparser_stringByDecodingHTMLEntities() + var s = body s = s.strippingHTML(maxCharacters: 250) + s = HTMLEntityDecoder.decodedString(s) s = s.trimmingWhitespace s = s.collapsingWhitespace if s == "Comments" { // Hacker News. diff --git a/Shared/Extensions/NSAttributedString+NetNewsWire.swift b/Shared/Extensions/NSAttributedString+NetNewsWire.swift index d5b29c347..3e852d694 100644 --- a/Shared/Extensions/NSAttributedString+NetNewsWire.swift +++ b/Shared/Extensions/NSAttributedString+NetNewsWire.swift @@ -289,6 +289,6 @@ private struct CountedSet where Element: Hashable { private extension String { var decodedEntity: String { // It's possible the implementation will change, but for now it just calls this. - (self as NSString).rsparser_stringByDecodingHTMLEntities() as String + HTMLEntityDecoder.decodedString(self) } } diff --git a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift index 87268b3cd..ce6b88176 100644 --- a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift +++ b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift @@ -10,11 +10,11 @@ import Foundation import Web import Parser -extension RSHTMLMetadata: @unchecked Sendable {} +extension HTMLMetadata: @unchecked Sendable {} struct HTMLMetadataDownloader { - @MainActor static func downloadMetadata(for url: String) async -> RSHTMLMetadata? { + @MainActor static func downloadMetadata(for url: String) async -> HTMLMetadata? { guard let actualURL = URL(string: url) else { return nil @@ -33,10 +33,10 @@ struct HTMLMetadataDownloader { return nil } - @MainActor private static func parseMetadata(with parserData: ParserData) async -> RSHTMLMetadata? { + @MainActor private static func parseMetadata(with parserData: ParserData) async -> HTMLMetadata? { - let task = Task.detached { () -> RSHTMLMetadata? in - RSHTMLMetadataParser.htmlMetadata(with: parserData) + let task = Task.detached { () -> HTMLMetadata? in + HTMLMetadataParser.metadata(with: parserData) } return await task.value