Convert downloader to async await. Convert FeedFinder to async await.

This commit is contained in:
Brent Simmons 2024-06-07 22:28:24 -07:00
parent 3df22da7d9
commit 3a992d4340
8 changed files with 214 additions and 243 deletions

View File

@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1530"
version = "1.7">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES"
buildArchitectures = "Automatic">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FeedFinder"
BuildableName = "FeedFinder"
BlueprintName = "FeedFinder"
ReferencedContainer = "container:">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
shouldAutocreateTestPlan = "YES">
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FeedFinder"
BuildableName = "FeedFinder"
BlueprintName = "FeedFinder"
ReferencedContainer = "container:">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>

View File

@ -19,74 +19,56 @@ import os.log
@MainActor public static func find(url: URL) async throws -> Set<FeedSpecifier> { @MainActor public static func find(url: URL) async throws -> Set<FeedSpecifier> {
try await withCheckedThrowingContinuation { continuation in var downloadData: DownloadData?
Task { @MainActor in
self.find(url: url) { result in do {
switch result { downloadData = try await DownloadWithCacheManager.shared.download(url)
case .success(let feedSpecifiers):
continuation.resume(returning: feedSpecifiers) } catch {
case .failure(let error): logger.error("FeedFinder: error for \(url) - \(error)")
continuation.resume(throwing: error) throw error
}
}
}
}
} }
@MainActor public static func find(url: URL, completion: @escaping @Sendable (Result<Set<FeedSpecifier>, Error>) -> Void) { guard let downloadData else {
downloadAddingToCache(url) { (data, response, error) in logger.error("FeedFinder: unexpectedly nil downloadData")
return Set<FeedSpecifier>()
}
MainActor.assumeIsolated { if downloadData.response?.forcedStatusCode == 404 {
if response?.forcedStatusCode == 404 {
logger.error("FeedFinder: 404 for \(url)")
if var urlComponents = URLComponents(url: url, resolvingAgainstBaseURL: false), urlComponents.host == "micro.blog" { if var urlComponents = URLComponents(url: url, resolvingAgainstBaseURL: false), urlComponents.host == "micro.blog" {
urlComponents.path = "\(urlComponents.path).json" urlComponents.path = "\(urlComponents.path).json"
if let newURLString = urlComponents.url?.absoluteString { if let newURLString = urlComponents.url?.absoluteString {
let microblogFeedSpecifier = FeedSpecifier(title: nil, urlString: newURLString, source: .HTMLLink, orderFound: 1) let microblogFeedSpecifier = FeedSpecifier(title: nil, urlString: newURLString, source: .HTMLLink, orderFound: 1)
completion(.success(Set([microblogFeedSpecifier]))) return Set([microblogFeedSpecifier])
} }
} else {
completion(.failure(AccountError.createErrorNotFound))
} }
return logger.error("FeedFinder: 404 for \(url)")
throw AccountError.createErrorNotFound
} }
if let error = error { guard let data = downloadData.data, !data.isEmpty, let response = downloadData.response else {
logger.error("FeedFinder: error for \(url) - \(error)")
completion(.failure(error))
return
}
guard let data, !data.isEmpty, let response else {
logger.error("FeedFinder: missing response and/or data for \(url)") logger.error("FeedFinder: missing response and/or data for \(url)")
completion(.failure(AccountError.createErrorNotFound)) throw AccountError.createErrorNotFound
return
} }
if !response.statusIsOK { if !response.statusIsOK {
logger.error("FeedFinder: non-OK response for \(url) - \(response.forcedStatusCode)") logger.error("FeedFinder: non-OK response for \(url) - \(response.forcedStatusCode)")
completion(.failure(AccountError.createErrorNotFound)) throw AccountError.createErrorNotFound
return
} }
if FeedFinder.isFeed(data, url.absoluteString) { if FeedFinder.isFeed(data, url.absoluteString) {
logger.info("FeedFinder: is feed \(url)") logger.info("FeedFinder: is feed \(url)")
let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1) let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1)
completion(.success(Set([feedSpecifier]))) return Set([feedSpecifier])
return
} }
if !FeedFinder.isHTML(data) { if !FeedFinder.isHTML(data) {
logger.error("FeedFinder: not feed and not HTML \(url)") logger.error("FeedFinder: not feed and not HTML \(url)")
completion(.failure(AccountError.createErrorNotFound)) throw AccountError.createErrorNotFound
return
} }
logger.info("FeedFinder: finding feeds in HTML \(url)") logger.info("FeedFinder: finding feeds in HTML \(url)")
FeedFinder.findFeedsInHTMLPage(htmlData: data, urlString: url.absoluteString, completion: completion) return try await findFeedsInHTMLPage(htmlData: data, urlString: url.absoluteString)
}
}
} }
} }
@ -104,7 +86,8 @@ private extension FeedFinder {
} }
} }
@MainActor static func findFeedsInHTMLPage(htmlData: Data, urlString: String, completion: @escaping (Result<Set<FeedSpecifier>, Error>) -> Void) { static func findFeedsInHTMLPage(htmlData: Data, urlString: String) async throws -> Set<FeedSpecifier> {
// Feeds in the <head> section we automatically assume are feeds. // Feeds in the <head> section we automatically assume are feeds.
// If there are none from the <head> section, // If there are none from the <head> section,
// then possible feeds in <body> section are downloaded individually // then possible feeds in <body> section are downloaded individually
@ -129,16 +112,13 @@ private extension FeedFinder {
} }
if didFindFeedInHTMLHead { if didFindFeedInHTMLHead {
completion(.success(Set(feedSpecifiers.values))) return Set(feedSpecifiers.values)
return
} }
else if feedSpecifiersToDownload.isEmpty { if feedSpecifiersToDownload.isEmpty {
completion(.failure(AccountError.createErrorNotFound)) throw AccountError.createErrorNotFound
return
}
else {
downloadFeedSpecifiers(feedSpecifiersToDownload, feedSpecifiers: feedSpecifiers, completion: completion)
} }
return await downloadFeedSpecifiers(feedSpecifiersToDownload, feedSpecifiers: feedSpecifiers)
} }
static func possibleFeedsInHTMLPage(htmlData: Data, urlString: String) -> Set<FeedSpecifier> { static func possibleFeedsInHTMLPage(htmlData: Data, urlString: String) -> Set<FeedSpecifier> {
@ -166,35 +146,25 @@ private extension FeedFinder {
return data.isProbablyHTML return data.isProbablyHTML
} }
@MainActor static func downloadFeedSpecifiers(_ downloadFeedSpecifiers: Set<FeedSpecifier>, feedSpecifiers: [String: FeedSpecifier], completion: @escaping (Result<Set<FeedSpecifier>, Error>) -> Void) { static func downloadFeedSpecifiers(_ downloadFeedSpecifiers: Set<FeedSpecifier>, feedSpecifiers: [String: FeedSpecifier]) async -> Set<FeedSpecifier> {
var resultFeedSpecifiers = feedSpecifiers var resultFeedSpecifiers = feedSpecifiers
let group = DispatchGroup()
for downloadFeedSpecifier in downloadFeedSpecifiers { for downloadFeedSpecifier in downloadFeedSpecifiers {
guard let url = URL(string: downloadFeedSpecifier.urlString) else { guard let url = URL(string: downloadFeedSpecifier.urlString) else {
continue continue
} }
group.enter() if let downloadData = try? await DownloadWithCacheManager.shared.download(url) {
if let data = downloadData.data, let response = downloadData.response, response.statusIsOK {
Task { @MainActor in if isFeed(data, downloadFeedSpecifier.urlString) {
downloadUsingCache(url) { (data, response, error) in
MainActor.assumeIsolated {
if let data = data, let response = response, response.statusIsOK, error == nil {
if self.isFeed(data, downloadFeedSpecifier.urlString) {
addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers) addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers)
} }
} }
group.leave()
}
}
} }
} }
group.notify(queue: DispatchQueue.main) { return Set(resultFeedSpecifiers.values)
completion(.success(Set(resultFeedSpecifiers.values)))
}
} }
static func isFeed(_ data: Data, _ urlString: String) -> Bool { static func isFeed(_ data: Data, _ urlString: String) -> Bool {

View File

@ -123,7 +123,7 @@ private extension SingleFaviconDownloader {
} }
do { do {
let downloadData = try await downloadUsingCache(url) let downloadData = try await DownloadWithCacheManager.shared.download(url)
let data = downloadData.data let data = downloadData.data
let response = downloadData.response let response = downloadData.response

View File

@ -88,7 +88,7 @@ private extension ImageDownloader {
} }
do { do {
let downloadData = try await downloadUsingCache(imageURL) let downloadData = try await DownloadWithCacheManager.shared.download(imageURL)
if let data = downloadData.data, !data.isEmpty, let response = downloadData.response, response.statusIsOK { if let data = downloadData.data, !data.isEmpty, let response = downloadData.response, response.statusIsOK {
try await saveToDisk(url, data) try await saveToDisk(url, data)

View File

@ -13,38 +13,21 @@ import Web
public struct InitialFeedDownloader { public struct InitialFeedDownloader {
@MainActor public static func download(_ url: URL) async -> ParsedFeed? { public static func download(_ url: URL) async -> ParsedFeed? {
await withCheckedContinuation { @MainActor continuation in guard let downloadData = try? await DownloadWithCacheManager.shared.download(url) else {
self.download(url) { parsedFeed in return nil
continuation.resume(returning: parsedFeed)
}
}
} }
@MainActor public static func download(_ url: URL,_ completion: @escaping @Sendable (_ parsedFeed: ParsedFeed?) -> Void) {
Task {
guard let downloadData = try? await downloadUsingCache(url) else {
completion(nil)
return
}
guard let data = downloadData.data else { guard let data = downloadData.data else {
completion(nil) return nil
return
} }
let parserData = ParserData(url: url.absoluteString, data: data) let parserData = ParserData(url: url.absoluteString, data: data)
Task.detached {
guard let parsedFeed = try? await FeedParser.parse(parserData) else { guard let parsedFeed = try? await FeedParser.parse(parserData) else {
completion(nil) return nil
return
} }
completion(parsedFeed) return parsedFeed
}
}
} }
} }

View File

@ -54,8 +54,8 @@ import CrashReporter
let formData = formString.data(using: .utf8, allowLossyConversion: true) let formData = formString.data(using: .utf8, allowLossyConversion: true)
request.httpBody = formData request.httpBody = formData
download(request) { (_, _, _) in Task { @MainActor in
// Dont care about the result. try? await OneShotDownloadManager.shared.download(request)
} }
} }

View File

@ -21,7 +21,7 @@ struct HTMLMetadataDownloader {
return nil return nil
} }
let downloadData = try? await downloadUsingCache(actualURL) let downloadData = try? await DownloadWithCacheManager.shared.download(actualURL)
let data = downloadData?.data let data = downloadData?.data
let response = downloadData?.response let response = downloadData?.response

View File

@ -7,23 +7,22 @@
// //
import Foundation import Foundation
import os
// Main thread only. public typealias DownloadData = (data: Data?, response: URLResponse?)
public typealias OneShotDownloadCallback = @Sendable (Data?, URLResponse?, Error?) -> Swift.Void public final class OneShotDownloadManager: Sendable {
@MainActor private final class OneShotDownloadManager {
public static let shared = OneShotDownloadManager()
private let urlSession: URLSession private let urlSession: URLSession
fileprivate static let shared = OneShotDownloadManager()
public init() { init() {
let sessionConfiguration = URLSessionConfiguration.ephemeral let sessionConfiguration = URLSessionConfiguration.ephemeral
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
sessionConfiguration.httpShouldSetCookies = false sessionConfiguration.httpShouldSetCookies = false
sessionConfiguration.httpCookieAcceptPolicy = .never sessionConfiguration.httpCookieAcceptPolicy = .never
sessionConfiguration.httpMaximumConnectionsPerHost = 2 sessionConfiguration.httpMaximumConnectionsPerHost = 1
sessionConfiguration.httpCookieStorage = nil sessionConfiguration.httpCookieStorage = nil
sessionConfiguration.urlCache = nil sessionConfiguration.urlCache = nil
sessionConfiguration.timeoutIntervalForRequest = 30 sessionConfiguration.timeoutIntervalForRequest = 30
@ -36,16 +35,38 @@ public typealias OneShotDownloadCallback = @Sendable (Data?, URLResponse?, Error
urlSession.invalidateAndCancel() urlSession.invalidateAndCancel()
} }
public func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback) { func download(_ url: URL) async throws -> DownloadData {
let task = urlSession.dataTask(with: url) { (data, response, error) in
DispatchQueue.main.async() { try await withCheckedThrowingContinuation { continuation in
completion(data, response, error) download(url) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else {
continuation.resume(returning: (data: data, response: response))
} }
} }
}
}
public func download(_ urlRequest: URLRequest) async throws -> DownloadData {
try await withCheckedThrowingContinuation { continuation in
download(urlRequest) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else {
continuation.resume(returning: (data: data, response: response))
}
}
}
}
private func download(_ url: URL, _ completion: @escaping @Sendable (Data?, URLResponse?, (any Error)?) -> Void) {
let task = urlSession.dataTask(with: url, completionHandler: completion)
task.resume() task.resume()
} }
public func download(_ urlRequest: URLRequest, _ completion: @escaping OneShotDownloadCallback) { private func download(_ urlRequest: URLRequest, _ completion: @escaping @Sendable (Data?, URLResponse?, (any Error)?) -> Void) {
let task = urlSession.dataTask(with: urlRequest) { (data, response, error) in let task = urlSession.dataTask(with: urlRequest) { (data, response, error) in
DispatchQueue.main.async() { DispatchQueue.main.async() {
completion(data, response, error) completion(data, response, error)
@ -55,19 +76,6 @@ public typealias OneShotDownloadCallback = @Sendable (Data?, URLResponse?, Error
} }
} }
// Call one of these. Its easier than referring to OneShotDownloadManager.
// callback is called on the main queue.
@MainActor public func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
OneShotDownloadManager.shared.download(url, completion)
}
@MainActor public func download(_ urlRequest: URLRequest, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
OneShotDownloadManager.shared.download(urlRequest, completion)
}
// MARK: - Downloading using a cache // MARK: - Downloading using a cache
private struct WebCacheRecord { private struct WebCacheRecord {
@ -78,36 +86,42 @@ private struct WebCacheRecord {
let response: URLResponse let response: URLResponse
} }
private final class WebCache { private final class WebCache: Sendable {
private var cache = [URL: WebCacheRecord]() private let cache = OSAllocatedUnfairLock(initialState: [URL: WebCacheRecord]())
func cleanup(_ cleanupInterval: TimeInterval) { func cleanup(_ cleanupInterval: TimeInterval) {
cache.withLock { d in
let cutoffDate = Date(timeInterval: -cleanupInterval, since: Date()) let cutoffDate = Date(timeInterval: -cleanupInterval, since: Date())
for key in cache.keys { for key in d.keys {
let cacheRecord = self[key]! let cacheRecord = d[key]!
if shouldDelete(cacheRecord, cutoffDate) { if shouldDelete(cacheRecord, cutoffDate) {
cache[key] = nil d[key] = nil
}
} }
} }
} }
private func shouldDelete(_ cacheRecord: WebCacheRecord, _ cutoffDate: Date) -> Bool { private func shouldDelete(_ cacheRecord: WebCacheRecord, _ cutoffDate: Date) -> Bool {
return cacheRecord.dateDownloaded < cutoffDate cacheRecord.dateDownloaded < cutoffDate
} }
subscript(_ url: URL) -> WebCacheRecord? { subscript(_ url: URL) -> WebCacheRecord? {
get { get {
return cache[url] cache.withLock { d in
return d[url]
}
} }
set { set {
cache.withLock { d in
if let cacheRecord = newValue { if let cacheRecord = newValue {
cache[url] = cacheRecord d[url] = cacheRecord
} }
else { else {
cache[url] = nil d[url] = nil
}
} }
} }
} }
@ -116,24 +130,16 @@ private final class WebCache {
// URLSessionConfiguration has a cache policy. // URLSessionConfiguration has a cache policy.
// But we dont know how it works, and the unimplemented parts spook us a bit. // But we dont know how it works, and the unimplemented parts spook us a bit.
// So we use a cache that works exactly as we want it to work. // So we use a cache that works exactly as we want it to work.
// It also makes sure we dont have multiple requests for the same URL at the same time.
private struct CallbackRecord { public final actor DownloadWithCacheManager {
let url: URL
let completion: OneShotDownloadCallback
}
@MainActor private final class DownloadWithCacheManager { public static let shared = DownloadWithCacheManager()
private let cache = WebCache()
static let shared = DownloadWithCacheManager()
private var cache = WebCache()
private static let timeToLive: TimeInterval = 10 * 60 // 10 minutes private static let timeToLive: TimeInterval = 10 * 60 // 10 minutes
private static let cleanupInterval: TimeInterval = 5 * 60 // clean up the cache at most every 5 minutes private static let cleanupInterval: TimeInterval = 5 * 60 // clean up the cache at most every 5 minutes
private var lastCleanupDate = Date() private var lastCleanupDate = Date()
private var pendingCallbacks = [CallbackRecord]()
private var urlsInProgress = Set<URL>()
@MainActor func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback, forceRedownload: Bool = false) { public func download(_ url: URL, forceRedownload: Bool = false) async throws -> DownloadData {
if lastCleanupDate.timeIntervalSinceNow < -DownloadWithCacheManager.cleanupInterval { if lastCleanupDate.timeIntervalSinceNow < -DownloadWithCacheManager.cleanupInterval {
lastCleanupDate = Date() lastCleanupDate = Date()
@ -141,73 +147,18 @@ private struct CallbackRecord {
} }
if !forceRedownload { if !forceRedownload {
let cacheRecord: WebCacheRecord? = cache[url] if let cacheRecord = cache[url] {
if let cacheRecord = cacheRecord { return (cacheRecord.data, cacheRecord.response)
completion(cacheRecord.data, cacheRecord.response, nil)
return
} }
} }
let callbackRecord = CallbackRecord(url: url, completion: completion) let downloadData = try await OneShotDownloadManager.shared.download(url)
pendingCallbacks.append(callbackRecord)
if urlsInProgress.contains(url) {
return // The completion handler will get called later.
}
urlsInProgress.insert(url)
OneShotDownloadManager.shared.download(url) { (data, response, error) in if let data = downloadData.data, let response = downloadData.response, response.statusIsOK {
MainActor.assumeIsolated {
self.urlsInProgress.remove(url)
if let data = data, let response = response, response.statusIsOK, error == nil {
let cacheRecord = WebCacheRecord(url: url, dateDownloaded: Date(), data: data, response: response) let cacheRecord = WebCacheRecord(url: url, dateDownloaded: Date(), data: data, response: response)
self.cache[url] = cacheRecord cache[url] = cacheRecord
} }
var callbackCount = 0 return downloadData
for callbackRecord in self.pendingCallbacks {
if url == callbackRecord.url {
callbackRecord.completion(data, response, error)
callbackCount += 1
} }
} }
self.pendingCallbacks.removeAll(where: { (callbackRecord) -> Bool in
return callbackRecord.url == url
})
}
}
}
}
public struct DownloadData: Sendable {
public let data: Data?
public let response: URLResponse?
}
@MainActor public func downloadUsingCache(_ url: URL) async throws -> DownloadData {
precondition(Thread.isMainThread)
return try await withCheckedThrowingContinuation { continuation in
downloadUsingCache(url) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else {
let downloadData = DownloadData(data: data, response: response)
continuation.resume(returning: downloadData)
}
}
}
}
@MainActor public func downloadUsingCache(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
DownloadWithCacheManager.shared.download(url, completion)
}
@MainActor public func downloadAddingToCache(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
DownloadWithCacheManager.shared.download(url, completion, forceRedownload: true)
}