Convert downloader to async await. Convert FeedFinder to async await.

This commit is contained in:
Brent Simmons 2024-06-07 22:28:24 -07:00
parent 3df22da7d9
commit 3a992d4340
8 changed files with 214 additions and 243 deletions

View File

@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1530"
version = "1.7">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES"
buildArchitectures = "Automatic">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FeedFinder"
BuildableName = "FeedFinder"
BlueprintName = "FeedFinder"
ReferencedContainer = "container:">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
shouldAutocreateTestPlan = "YES">
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FeedFinder"
BuildableName = "FeedFinder"
BlueprintName = "FeedFinder"
ReferencedContainer = "container:">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>

View File

@ -19,74 +19,56 @@ import os.log
@MainActor public static func find(url: URL) async throws -> Set<FeedSpecifier> {
try await withCheckedThrowingContinuation { continuation in
Task { @MainActor in
self.find(url: url) { result in
switch result {
case .success(let feedSpecifiers):
continuation.resume(returning: feedSpecifiers)
case .failure(let error):
continuation.resume(throwing: error)
}
var downloadData: DownloadData?
do {
downloadData = try await DownloadWithCacheManager.shared.download(url)
} catch {
logger.error("FeedFinder: error for \(url) - \(error)")
throw error
}
guard let downloadData else {
logger.error("FeedFinder: unexpectedly nil downloadData")
return Set<FeedSpecifier>()
}
if downloadData.response?.forcedStatusCode == 404 {
if var urlComponents = URLComponents(url: url, resolvingAgainstBaseURL: false), urlComponents.host == "micro.blog" {
urlComponents.path = "\(urlComponents.path).json"
if let newURLString = urlComponents.url?.absoluteString {
let microblogFeedSpecifier = FeedSpecifier(title: nil, urlString: newURLString, source: .HTMLLink, orderFound: 1)
return Set([microblogFeedSpecifier])
}
}
logger.error("FeedFinder: 404 for \(url)")
throw AccountError.createErrorNotFound
}
}
@MainActor public static func find(url: URL, completion: @escaping @Sendable (Result<Set<FeedSpecifier>, Error>) -> Void) {
downloadAddingToCache(url) { (data, response, error) in
MainActor.assumeIsolated {
if response?.forcedStatusCode == 404 {
logger.error("FeedFinder: 404 for \(url)")
if var urlComponents = URLComponents(url: url, resolvingAgainstBaseURL: false), urlComponents.host == "micro.blog" {
urlComponents.path = "\(urlComponents.path).json"
if let newURLString = urlComponents.url?.absoluteString {
let microblogFeedSpecifier = FeedSpecifier(title: nil, urlString: newURLString, source: .HTMLLink, orderFound: 1)
completion(.success(Set([microblogFeedSpecifier])))
}
} else {
completion(.failure(AccountError.createErrorNotFound))
}
return
}
if let error = error {
logger.error("FeedFinder: error for \(url) - \(error)")
completion(.failure(error))
return
}
guard let data, !data.isEmpty, let response else {
logger.error("FeedFinder: missing response and/or data for \(url)")
completion(.failure(AccountError.createErrorNotFound))
return
}
if !response.statusIsOK {
logger.error("FeedFinder: non-OK response for \(url) - \(response.forcedStatusCode)")
completion(.failure(AccountError.createErrorNotFound))
return
}
if FeedFinder.isFeed(data, url.absoluteString) {
logger.info("FeedFinder: is feed \(url)")
let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1)
completion(.success(Set([feedSpecifier])))
return
}
if !FeedFinder.isHTML(data) {
logger.error("FeedFinder: not feed and not HTML \(url)")
completion(.failure(AccountError.createErrorNotFound))
return
}
logger.info("FeedFinder: finding feeds in HTML \(url)")
FeedFinder.findFeedsInHTMLPage(htmlData: data, urlString: url.absoluteString, completion: completion)
}
guard let data = downloadData.data, !data.isEmpty, let response = downloadData.response else {
logger.error("FeedFinder: missing response and/or data for \(url)")
throw AccountError.createErrorNotFound
}
if !response.statusIsOK {
logger.error("FeedFinder: non-OK response for \(url) - \(response.forcedStatusCode)")
throw AccountError.createErrorNotFound
}
if FeedFinder.isFeed(data, url.absoluteString) {
logger.info("FeedFinder: is feed \(url)")
let feedSpecifier = FeedSpecifier(title: nil, urlString: url.absoluteString, source: .UserEntered, orderFound: 1)
return Set([feedSpecifier])
}
if !FeedFinder.isHTML(data) {
logger.error("FeedFinder: not feed and not HTML \(url)")
throw AccountError.createErrorNotFound
}
logger.info("FeedFinder: finding feeds in HTML \(url)")
return try await findFeedsInHTMLPage(htmlData: data, urlString: url.absoluteString)
}
}
@ -104,7 +86,8 @@ private extension FeedFinder {
}
}
@MainActor static func findFeedsInHTMLPage(htmlData: Data, urlString: String, completion: @escaping (Result<Set<FeedSpecifier>, Error>) -> Void) {
static func findFeedsInHTMLPage(htmlData: Data, urlString: String) async throws -> Set<FeedSpecifier> {
// Feeds in the <head> section we automatically assume are feeds.
// If there are none from the <head> section,
// then possible feeds in <body> section are downloaded individually
@ -129,16 +112,13 @@ private extension FeedFinder {
}
if didFindFeedInHTMLHead {
completion(.success(Set(feedSpecifiers.values)))
return
return Set(feedSpecifiers.values)
}
else if feedSpecifiersToDownload.isEmpty {
completion(.failure(AccountError.createErrorNotFound))
return
}
else {
downloadFeedSpecifiers(feedSpecifiersToDownload, feedSpecifiers: feedSpecifiers, completion: completion)
if feedSpecifiersToDownload.isEmpty {
throw AccountError.createErrorNotFound
}
return await downloadFeedSpecifiers(feedSpecifiersToDownload, feedSpecifiers: feedSpecifiers)
}
static func possibleFeedsInHTMLPage(htmlData: Data, urlString: String) -> Set<FeedSpecifier> {
@ -166,35 +146,25 @@ private extension FeedFinder {
return data.isProbablyHTML
}
@MainActor static func downloadFeedSpecifiers(_ downloadFeedSpecifiers: Set<FeedSpecifier>, feedSpecifiers: [String: FeedSpecifier], completion: @escaping (Result<Set<FeedSpecifier>, Error>) -> Void) {
static func downloadFeedSpecifiers(_ downloadFeedSpecifiers: Set<FeedSpecifier>, feedSpecifiers: [String: FeedSpecifier]) async -> Set<FeedSpecifier> {
var resultFeedSpecifiers = feedSpecifiers
let group = DispatchGroup()
for downloadFeedSpecifier in downloadFeedSpecifiers {
guard let url = URL(string: downloadFeedSpecifier.urlString) else {
continue
}
group.enter()
Task { @MainActor in
downloadUsingCache(url) { (data, response, error) in
MainActor.assumeIsolated {
if let data = data, let response = response, response.statusIsOK, error == nil {
if self.isFeed(data, downloadFeedSpecifier.urlString) {
addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers)
}
}
group.leave()
if let downloadData = try? await DownloadWithCacheManager.shared.download(url) {
if let data = downloadData.data, let response = downloadData.response, response.statusIsOK {
if isFeed(data, downloadFeedSpecifier.urlString) {
addFeedSpecifier(downloadFeedSpecifier, feedSpecifiers: &resultFeedSpecifiers)
}
}
}
}
group.notify(queue: DispatchQueue.main) {
completion(.success(Set(resultFeedSpecifiers.values)))
}
return Set(resultFeedSpecifiers.values)
}
static func isFeed(_ data: Data, _ urlString: String) -> Bool {

View File

@ -123,7 +123,7 @@ private extension SingleFaviconDownloader {
}
do {
let downloadData = try await downloadUsingCache(url)
let downloadData = try await DownloadWithCacheManager.shared.download(url)
let data = downloadData.data
let response = downloadData.response

View File

@ -88,7 +88,7 @@ private extension ImageDownloader {
}
do {
let downloadData = try await downloadUsingCache(imageURL)
let downloadData = try await DownloadWithCacheManager.shared.download(imageURL)
if let data = downloadData.data, !data.isEmpty, let response = downloadData.response, response.statusIsOK {
try await saveToDisk(url, data)

View File

@ -13,38 +13,21 @@ import Web
public struct InitialFeedDownloader {
@MainActor public static func download(_ url: URL) async -> ParsedFeed? {
public static func download(_ url: URL) async -> ParsedFeed? {
await withCheckedContinuation { @MainActor continuation in
self.download(url) { parsedFeed in
continuation.resume(returning: parsedFeed)
}
guard let downloadData = try? await DownloadWithCacheManager.shared.download(url) else {
return nil
}
}
@MainActor public static func download(_ url: URL,_ completion: @escaping @Sendable (_ parsedFeed: ParsedFeed?) -> Void) {
Task {
guard let downloadData = try? await downloadUsingCache(url) else {
completion(nil)
return
}
guard let data = downloadData.data else {
completion(nil)
return
}
let parserData = ParserData(url: url.absoluteString, data: data)
Task.detached {
guard let parsedFeed = try? await FeedParser.parse(parserData) else {
completion(nil)
return
}
completion(parsedFeed)
}
guard let data = downloadData.data else {
return nil
}
let parserData = ParserData(url: url.absoluteString, data: data)
guard let parsedFeed = try? await FeedParser.parse(parserData) else {
return nil
}
return parsedFeed
}
}

View File

@ -54,8 +54,8 @@ import CrashReporter
let formData = formString.data(using: .utf8, allowLossyConversion: true)
request.httpBody = formData
download(request) { (_, _, _) in
// Dont care about the result.
Task { @MainActor in
try? await OneShotDownloadManager.shared.download(request)
}
}

View File

@ -21,7 +21,7 @@ struct HTMLMetadataDownloader {
return nil
}
let downloadData = try? await downloadUsingCache(actualURL)
let downloadData = try? await DownloadWithCacheManager.shared.download(actualURL)
let data = downloadData?.data
let response = downloadData?.response

View File

@ -7,23 +7,22 @@
//
import Foundation
import os
// Main thread only.
public typealias DownloadData = (data: Data?, response: URLResponse?)
public typealias OneShotDownloadCallback = @Sendable (Data?, URLResponse?, Error?) -> Swift.Void
@MainActor private final class OneShotDownloadManager {
public final class OneShotDownloadManager: Sendable {
public static let shared = OneShotDownloadManager()
private let urlSession: URLSession
fileprivate static let shared = OneShotDownloadManager()
public init() {
init() {
let sessionConfiguration = URLSessionConfiguration.ephemeral
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
sessionConfiguration.httpShouldSetCookies = false
sessionConfiguration.httpCookieAcceptPolicy = .never
sessionConfiguration.httpMaximumConnectionsPerHost = 2
sessionConfiguration.httpMaximumConnectionsPerHost = 1
sessionConfiguration.httpCookieStorage = nil
sessionConfiguration.urlCache = nil
sessionConfiguration.timeoutIntervalForRequest = 30
@ -36,16 +35,38 @@ public typealias OneShotDownloadCallback = @Sendable (Data?, URLResponse?, Error
urlSession.invalidateAndCancel()
}
public func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
let task = urlSession.dataTask(with: url) { (data, response, error) in
DispatchQueue.main.async() {
completion(data, response, error)
func download(_ url: URL) async throws -> DownloadData {
try await withCheckedThrowingContinuation { continuation in
download(url) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else {
continuation.resume(returning: (data: data, response: response))
}
}
}
}
public func download(_ urlRequest: URLRequest) async throws -> DownloadData {
try await withCheckedThrowingContinuation { continuation in
download(urlRequest) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else {
continuation.resume(returning: (data: data, response: response))
}
}
}
}
private func download(_ url: URL, _ completion: @escaping @Sendable (Data?, URLResponse?, (any Error)?) -> Void) {
let task = urlSession.dataTask(with: url, completionHandler: completion)
task.resume()
}
public func download(_ urlRequest: URLRequest, _ completion: @escaping OneShotDownloadCallback) {
private func download(_ urlRequest: URLRequest, _ completion: @escaping @Sendable (Data?, URLResponse?, (any Error)?) -> Void) {
let task = urlSession.dataTask(with: urlRequest) { (data, response, error) in
DispatchQueue.main.async() {
completion(data, response, error)
@ -55,19 +76,6 @@ public typealias OneShotDownloadCallback = @Sendable (Data?, URLResponse?, Error
}
}
// Call one of these. Its easier than referring to OneShotDownloadManager.
// callback is called on the main queue.
@MainActor public func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
OneShotDownloadManager.shared.download(url, completion)
}
@MainActor public func download(_ urlRequest: URLRequest, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
OneShotDownloadManager.shared.download(urlRequest, completion)
}
// MARK: - Downloading using a cache
private struct WebCacheRecord {
@ -78,37 +86,43 @@ private struct WebCacheRecord {
let response: URLResponse
}
private final class WebCache {
private var cache = [URL: WebCacheRecord]()
private final class WebCache: Sendable {
private let cache = OSAllocatedUnfairLock(initialState: [URL: WebCacheRecord]())
func cleanup(_ cleanupInterval: TimeInterval) {
let cutoffDate = Date(timeInterval: -cleanupInterval, since: Date())
for key in cache.keys {
let cacheRecord = self[key]!
if shouldDelete(cacheRecord, cutoffDate) {
cache[key] = nil
}
}
cache.withLock { d in
let cutoffDate = Date(timeInterval: -cleanupInterval, since: Date())
for key in d.keys {
let cacheRecord = d[key]!
if shouldDelete(cacheRecord, cutoffDate) {
d[key] = nil
}
}
}
}
private func shouldDelete(_ cacheRecord: WebCacheRecord, _ cutoffDate: Date) -> Bool {
return cacheRecord.dateDownloaded < cutoffDate
cacheRecord.dateDownloaded < cutoffDate
}
subscript(_ url: URL) -> WebCacheRecord? {
get {
return cache[url]
cache.withLock { d in
return d[url]
}
}
set {
if let cacheRecord = newValue {
cache[url] = cacheRecord
}
else {
cache[url] = nil
}
cache.withLock { d in
if let cacheRecord = newValue {
d[url] = cacheRecord
}
else {
d[url] = nil
}
}
}
}
}
@ -116,24 +130,16 @@ private final class WebCache {
// URLSessionConfiguration has a cache policy.
// But we dont know how it works, and the unimplemented parts spook us a bit.
// So we use a cache that works exactly as we want it to work.
// It also makes sure we dont have multiple requests for the same URL at the same time.
private struct CallbackRecord {
let url: URL
let completion: OneShotDownloadCallback
}
public final actor DownloadWithCacheManager {
@MainActor private final class DownloadWithCacheManager {
static let shared = DownloadWithCacheManager()
private var cache = WebCache()
public static let shared = DownloadWithCacheManager()
private let cache = WebCache()
private static let timeToLive: TimeInterval = 10 * 60 // 10 minutes
private static let cleanupInterval: TimeInterval = 5 * 60 // clean up the cache at most every 5 minutes
private var lastCleanupDate = Date()
private var pendingCallbacks = [CallbackRecord]()
private var urlsInProgress = Set<URL>()
@MainActor func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback, forceRedownload: Bool = false) {
public func download(_ url: URL, forceRedownload: Bool = false) async throws -> DownloadData {
if lastCleanupDate.timeIntervalSinceNow < -DownloadWithCacheManager.cleanupInterval {
lastCleanupDate = Date()
@ -141,73 +147,18 @@ private struct CallbackRecord {
}
if !forceRedownload {
let cacheRecord: WebCacheRecord? = cache[url]
if let cacheRecord = cacheRecord {
completion(cacheRecord.data, cacheRecord.response, nil)
return
if let cacheRecord = cache[url] {
return (cacheRecord.data, cacheRecord.response)
}
}
let callbackRecord = CallbackRecord(url: url, completion: completion)
pendingCallbacks.append(callbackRecord)
if urlsInProgress.contains(url) {
return // The completion handler will get called later.
let downloadData = try await OneShotDownloadManager.shared.download(url)
if let data = downloadData.data, let response = downloadData.response, response.statusIsOK {
let cacheRecord = WebCacheRecord(url: url, dateDownloaded: Date(), data: data, response: response)
cache[url] = cacheRecord
}
urlsInProgress.insert(url)
OneShotDownloadManager.shared.download(url) { (data, response, error) in
MainActor.assumeIsolated {
self.urlsInProgress.remove(url)
if let data = data, let response = response, response.statusIsOK, error == nil {
let cacheRecord = WebCacheRecord(url: url, dateDownloaded: Date(), data: data, response: response)
self.cache[url] = cacheRecord
}
var callbackCount = 0
for callbackRecord in self.pendingCallbacks {
if url == callbackRecord.url {
callbackRecord.completion(data, response, error)
callbackCount += 1
}
}
self.pendingCallbacks.removeAll(where: { (callbackRecord) -> Bool in
return callbackRecord.url == url
})
}
}
return downloadData
}
}
public struct DownloadData: Sendable {
public let data: Data?
public let response: URLResponse?
}
@MainActor public func downloadUsingCache(_ url: URL) async throws -> DownloadData {
precondition(Thread.isMainThread)
return try await withCheckedThrowingContinuation { continuation in
downloadUsingCache(url) { data, response, error in
if let error {
continuation.resume(throwing: error)
} else {
let downloadData = DownloadData(data: data, response: response)
continuation.resume(returning: downloadData)
}
}
}
}
@MainActor public func downloadUsingCache(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
DownloadWithCacheManager.shared.download(url, completion)
}
@MainActor public func downloadAddingToCache(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
DownloadWithCacheManager.shared.download(url, completion, forceRedownload: true)
}