diff --git a/Frameworks/ArticlesDatabase/ArticlesDatabase.swift b/Frameworks/ArticlesDatabase/ArticlesDatabase.swift index 0b7163771..a5df2d103 100644 --- a/Frameworks/ArticlesDatabase/ArticlesDatabase.swift +++ b/Frameworks/ArticlesDatabase/ArticlesDatabase.swift @@ -38,6 +38,9 @@ public final class ArticlesDatabase { database.executeStatements("DROP TABLE if EXISTS tags;DROP INDEX if EXISTS tags_tagName_index;DROP INDEX if EXISTS articles_feedID_index;DROP INDEX if EXISTS statuses_read_index;") } queue.vacuumIfNeeded() + DispatchQueue.main.async { + self.articlesTable.indexUnindexedArticles() + } } // MARK: - Fetching Articles diff --git a/Frameworks/ArticlesDatabase/ArticlesDatabase.xcodeproj/project.pbxproj b/Frameworks/ArticlesDatabase/ArticlesDatabase.xcodeproj/project.pbxproj index 81a53e9b0..cfdf2cf90 100644 --- a/Frameworks/ArticlesDatabase/ArticlesDatabase.xcodeproj/project.pbxproj +++ b/Frameworks/ArticlesDatabase/ArticlesDatabase.xcodeproj/project.pbxproj @@ -21,6 +21,7 @@ 845580761F0AF670003CCFA1 /* Article+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845580751F0AF670003CCFA1 /* Article+Database.swift */; }; 8455807A1F0AF67D003CCFA1 /* ArticleStatus+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845580791F0AF67D003CCFA1 /* ArticleStatus+Database.swift */; }; 8455807C1F0C0DBD003CCFA1 /* Attachment+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8455807B1F0C0DBD003CCFA1 /* Attachment+Database.swift */; }; + 8477ACBC2221E76F00DF7F37 /* SearchTable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8477ACBB2221E76F00DF7F37 /* SearchTable.swift */; }; 848AD2961F58A91E004FB0EC /* UnreadCountDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */; }; 848E3EB920FBCFD20004B7ED /* RSCore.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 848E3EB820FBCFD20004B7ED /* RSCore.framework */; }; 848E3EBB20FBCFD80004B7ED /* RSParser.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 848E3EBA20FBCFD80004B7ED /* RSParser.framework */; }; @@ -128,6 +129,7 @@ 845580791F0AF67D003CCFA1 /* ArticleStatus+Database.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = "ArticleStatus+Database.swift"; path = "Extensions/ArticleStatus+Database.swift"; sourceTree = ""; }; 8455807B1F0C0DBD003CCFA1 /* Attachment+Database.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = "Attachment+Database.swift"; path = "Extensions/Attachment+Database.swift"; sourceTree = ""; }; 8461461E1F0ABC7300870CB3 /* RSParser.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = RSParser.xcodeproj; path = ../RSParser/RSParser.xcodeproj; sourceTree = ""; }; + 8477ACBB2221E76F00DF7F37 /* SearchTable.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SearchTable.swift; sourceTree = ""; }; 848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UnreadCountDictionary.swift; sourceTree = ""; }; 848E3EB820FBCFD20004B7ED /* RSCore.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = RSCore.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 848E3EBA20FBCFD80004B7ED /* RSParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = RSParser.framework; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -178,6 +180,7 @@ 848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */, 845580661F0AEBCD003CCFA1 /* Constants.swift */, 84E156EB1F0AB80E00F8CC05 /* ArticlesTable.swift */, + 8477ACBB2221E76F00DF7F37 /* SearchTable.swift */, 843577151F744FC800F460AE /* DatabaseArticle.swift */, 84E156ED1F0AB81400F8CC05 /* StatusesTable.swift */, 84F20F8E1F180D8700D8E682 /* AuthorsTable.swift */, @@ -503,6 +506,7 @@ 840405CF1F1A963700DF0296 /* AttachmentsTable.swift in Sources */, 84F20F8F1F180D8700D8E682 /* AuthorsTable.swift in Sources */, 84288A001F6A3C4400395871 /* DatabaseObject+Database.swift in Sources */, + 8477ACBC2221E76F00DF7F37 /* SearchTable.swift in Sources */, 843577161F744FC800F460AE /* DatabaseArticle.swift in Sources */, 843702C31F70D15D00B18807 /* ParsedArticle+Database.swift in Sources */, 84E156EC1F0AB80E00F8CC05 /* ArticlesTable.swift in Sources */, diff --git a/Frameworks/ArticlesDatabase/ArticlesTable.swift b/Frameworks/ArticlesDatabase/ArticlesTable.swift index 058c37513..42970bb5e 100644 --- a/Frameworks/ArticlesDatabase/ArticlesTable.swift +++ b/Frameworks/ArticlesDatabase/ArticlesTable.swift @@ -21,6 +21,10 @@ final class ArticlesTable: DatabaseTable { private let authorsLookupTable: DatabaseLookupTable private let attachmentsLookupTable: DatabaseLookupTable + private lazy var searchTable: SearchTable = { + return SearchTable(queue: queue, articlesTable: self) + }() + // TODO: update articleCutoffDate as time passes and based on user preferences. private var articleCutoffDate = NSDate.rs_dateWithNumberOfDays(inThePast: 3 * 31)! private var maximumArticleCutoffDate = NSDate.rs_dateWithNumberOfDays(inThePast: 4 * 31)! @@ -31,7 +35,7 @@ final class ArticlesTable: DatabaseTable { self.accountID = accountID self.queue = queue self.statusesTable = StatusesTable(queue: queue) - + let authorsTable = AuthorsTable(name: DatabaseTableName.authors) self.authorsLookupTable = DatabaseLookupTable(name: DatabaseTableName.authorsLookup, objectIDKey: DatabaseKey.articleID, relatedObjectIDKey: DatabaseKey.authorID, relatedTable: authorsTable, relationshipName: RelationshipName.authors) @@ -88,6 +92,31 @@ final class ArticlesTable: DatabaseTable { } } + func fetchArticleSearchInfos(_ articleIDs: Set, in database: FMDatabase) -> Set? { + let parameters = articleIDs.map { $0 as AnyObject } + let placeholders = NSString.rs_SQLValueList(withPlaceholders: UInt(articleIDs.count))! + let sql = "select articleID, title, contentHTML, contentText, summary, searchRowID from articles where articleID in \(placeholders);"; + + if let resultSet = database.executeQuery(sql, withArgumentsIn: parameters) { + return resultSet.mapToSet { (row) -> ArticleSearchInfo? in + let articleID = row.string(forColumn: DatabaseKey.articleID)! + let title = row.string(forColumn: DatabaseKey.title) + let contentHTML = row.string(forColumn: DatabaseKey.contentHTML) + let contentText = row.string(forColumn: DatabaseKey.contentText) + let summary = row.string(forColumn: DatabaseKey.summary) + + let searchRowIDObject = row.object(forColumnName: DatabaseKey.searchRowID) + var searchRowID: Int? = nil + if searchRowIDObject != nil && !(searchRowIDObject is NSNull) { + searchRowID = Int(row.longLongInt(forColumn: DatabaseKey.searchRowID)) + } + + return ArticleSearchInfo(articleID: articleID, title: title, contentHTML: contentHTML, contentText: contentText, summary: summary, searchRowID: searchRowID) + } + } + return nil + } + // MARK: Updating func update(_ feedID: String, _ parsedFeed: ParsedFeed, _ completion: @escaping UpdateArticlesWithFeedCompletionBlock) { @@ -104,7 +133,8 @@ final class ArticlesTable: DatabaseTable { // 5. Create array of Articles not in database and save them. // 6. Create array of updated Articles and save what’s changed. // 7. Call back with new and updated Articles. - + // 8. Update search index. + let articleIDs = Set(parsedFeed.items.map { $0.articleID }) self.queue.update { (database) in @@ -131,6 +161,22 @@ final class ArticlesTable: DatabaseTable { let updatedArticles = self.findAndSaveUpdatedArticles(incomingArticles, fetchedArticlesDictionary, database) //6 self.callUpdateArticlesCompletionBlock(newArticles, updatedArticles, completion) //7 + + // 8. Update search index. + var articlesToIndex = Set
() + if let newArticles = newArticles { + articlesToIndex.formUnion(newArticles) + } + if let updatedArticles = updatedArticles { + articlesToIndex.formUnion(updatedArticles) + } + let articleIDs = articlesToIndex.articleIDs() + if articleIDs.isEmpty { + return + } + DispatchQueue.main.async() { + self.searchTable.ensureIndexedArticles(for: articleIDs) + } } } @@ -247,6 +293,26 @@ final class ArticlesTable: DatabaseTable { return statusesTable.markEverywhereAsRead() } + + // MARK: Indexing + + func indexUnindexedArticles() { + queue.fetch { (database) in + let sql = "select articleID from articles where searchRowID is null limit 500;" + guard let resultSet = database.executeQuery(sql, withArgumentsIn: nil) else { + return + } + let articleIDs = resultSet.mapToSet{ $0.string(forColumn: DatabaseKey.articleID) } + if articleIDs.isEmpty { + return + } + self.searchTable.ensureIndexedArticles(for: articleIDs) + + DispatchQueue.main.async { + self.indexUnindexedArticles() + } + } + } } // MARK: - Private diff --git a/Frameworks/ArticlesDatabase/Constants.swift b/Frameworks/ArticlesDatabase/Constants.swift index 1bca77fbb..054f96919 100644 --- a/Frameworks/ArticlesDatabase/Constants.swift +++ b/Frameworks/ArticlesDatabase/Constants.swift @@ -40,6 +40,7 @@ struct DatabaseKey { static let dateModified = "dateModified" static let authors = "authors" static let attachments = "attachments" + static let searchRowID = "searchRowID" // ArticleStatus static let read = "read" @@ -61,6 +62,10 @@ struct DatabaseKey { static let name = "name" static let avatarURL = "avatarURL" static let emailAddress = "emailAddress" + + // Search + static let body = "body" + static let rowID = "rowid" } struct RelationshipName { diff --git a/Frameworks/ArticlesDatabase/DatabaseArticle.swift b/Frameworks/ArticlesDatabase/DatabaseArticle.swift index 7a8d22acf..597decbf9 100644 --- a/Frameworks/ArticlesDatabase/DatabaseArticle.swift +++ b/Frameworks/ArticlesDatabase/DatabaseArticle.swift @@ -28,16 +28,17 @@ struct DatabaseArticle: Hashable { let datePublished: Date? let dateModified: Date? let status: ArticleStatus - var hashValue: Int { - return articleID.hashValue + + // MARK: - Hashable + + public func hash(into hasher: inout Hasher) { + hasher.combine(articleID) } } - extension Set where Element == DatabaseArticle { func articleIDs() -> Set { - return Set(map { $0.articleID }) } } diff --git a/Frameworks/ArticlesDatabase/SearchTable.swift b/Frameworks/ArticlesDatabase/SearchTable.swift new file mode 100644 index 000000000..491f204be --- /dev/null +++ b/Frameworks/ArticlesDatabase/SearchTable.swift @@ -0,0 +1,193 @@ +// +// SearchTable.swift +// ArticlesDatabase +// +// Created by Brent Simmons on 2/23/19. +// Copyright © 2019 Ranchero Software. All rights reserved. +// + +import Foundation +import RSCore +import RSDatabase +import Articles +import RSParser + +final class ArticleSearchInfo: Hashable { + + let articleID: String + let title: String? + let contentHTML: String? + let contentText: String? + let summary: String? + let searchRowID: Int? + + var preferredText: String { + if let body = contentHTML, !body.isEmpty { + return body + } + if let body = contentText, !body.isEmpty { + return body + } + return summary ?? "" + } + + lazy var bodyForIndex: String = { + let s = preferredText.rsparser_stringByDecodingHTMLEntities() + return s.rs_string(byStrippingHTML: 0).rs_stringWithCollapsedWhitespace() + }() + + init(articleID: String, title: String?, contentHTML: String?, contentText: String?, summary: String?, searchRowID: Int?) { + self.articleID = articleID + self.title = title + self.contentHTML = contentHTML + self.contentText = contentText + self.summary = summary + self.searchRowID = searchRowID + } + + // MARK: Hashable + + public func hash(into hasher: inout Hasher) { + hasher.combine(articleID) + } + + // MARK: Equatable + + static func == (lhs: ArticleSearchInfo, rhs: ArticleSearchInfo) -> Bool { + return lhs.articleID == rhs.articleID && lhs.title == rhs.title && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.searchRowID == rhs.searchRowID + } +} + +final class SearchTable: DatabaseTable { + + let name = "search" + private let queue: RSDatabaseQueue + private weak var articlesTable: ArticlesTable? + + init(queue: RSDatabaseQueue, articlesTable: ArticlesTable) { + self.queue = queue + self.articlesTable = articlesTable + } + + func ensureIndexedArticles(for articleIDs: Set) { + if articleIDs.isEmpty { + return + } + queue.update { (database) in + self.ensureIndexedArticles(articleIDs, database) + } + } +} + +// MARK: - Private + +private extension SearchTable { + + func ensureIndexedArticles(_ articleIDs: Set, _ database: FMDatabase) { + guard let articlesTable = articlesTable else { + return + } + guard let articleSearchInfos = articlesTable.fetchArticleSearchInfos(articleIDs, in: database) else { + return + } + + let unindexedArticles = articleSearchInfos.filter { $0.searchRowID == nil } + performInitialIndexForArticles(unindexedArticles, database) + + let indexedArticles = articleSearchInfos.filter { $0.searchRowID != nil } + updateIndexForArticles(indexedArticles, database) + } + + func performInitialIndexForArticles(_ articles: Set, _ database: FMDatabase) { + articles.forEach { performInitialIndex($0, database) } + } + + func performInitialIndex(_ article: ArticleSearchInfo, _ database: FMDatabase) { + let rowid = insert(article, database) + articlesTable?.updateRowsWithValue(rowid, valueKey: DatabaseKey.searchRowID, whereKey: DatabaseKey.articleID, matches: [article.articleID], database: database) + } + + func insert(_ article: ArticleSearchInfo, _ database: FMDatabase) -> Int { + let rowDictionary = NSMutableDictionary() + rowDictionary.setObject(article.title ?? "", forKey: DatabaseKey.title as NSString) + rowDictionary.setObject(article.bodyForIndex, forKey: DatabaseKey.body as NSString) + insertRow(rowDictionary, insertType: .normal, in: database) + return Int(database.lastInsertRowId()) + } + + private struct SearchInfo: Hashable { + let rowID: Int + let title: String + let body: String + + init(row: FMResultSet) { + self.rowID = Int(row.longLongInt(forColumn: DatabaseKey.rowID)) + self.title = row.string(forColumn: DatabaseKey.title) ?? "" + self.body = row.string(forColumn: DatabaseKey.body) ?? "" + } + + // MARK: Hashable + + public func hash(into hasher: inout Hasher) { + hasher.combine(rowID) + } + } + + func updateIndexForArticles(_ articles: Set, _ database: FMDatabase) { + if articles.isEmpty { + return + } + guard let searchInfos = fetchSearchInfos(articles, database) else { + // The articles that get here have a non-nil searchRowID, and we should have found rows in the search table for them. + // But we didn’t. Recover by doing an initial index. + performInitialIndexForArticles(articles, database) + return + } + let groupedSearchInfos = Dictionary(grouping: searchInfos, by: { $0.rowID }) + let searchInfosDictionary = groupedSearchInfos.mapValues { $0.first! } + + articles.forEach { (article) in + updateIndexForArticle(article, searchInfosDictionary, database) + } + } + + private func updateIndexForArticle(_ article: ArticleSearchInfo, _ searchInfosDictionary: [Int: SearchInfo], _ database: FMDatabase) { + guard let searchRowID = article.searchRowID else { + assertionFailure("Expected article.searchRowID, got nil") + return + } + guard let searchInfo: SearchInfo = searchInfosDictionary[searchRowID] else { + // Shouldn’t happen. The article has a searchRowID, but we didn’t find that row in the search table. + // Easy to recover from: just do an initial index, and all’s well. + performInitialIndex(article, database) + return + } + + let title = article.title ?? "" + if title == searchInfo.title && article.bodyForIndex == searchInfo.body { + return + } + + let updateDictionary = NSMutableDictionary() + if title != searchInfo.title { + updateDictionary.setObject(title, forKey: DatabaseKey.title as NSString) + } + if article.bodyForIndex != searchInfo.body { + updateDictionary.setObject(article.bodyForIndex, forKey: DatabaseKey.body as NSString) + } + updateRowsWithDictionary(updateDictionary, whereKey: DatabaseKey.rowID, matches: searchInfo.rowID, database: database) + } + + private func fetchSearchInfos(_ articles: Set, _ database: FMDatabase) -> Set? { + let searchRowIDs = articles.compactMap { $0.searchRowID } + guard !searchRowIDs.isEmpty else { + return nil + } + let placeholders = NSString.rs_SQLValueList(withPlaceholders: UInt(searchRowIDs.count))! + let sql = "select rowid, title, body from \(name) where rowid in \(placeholders);" + guard let resultSet = database.executeQuery(sql, withArgumentsIn: searchRowIDs) else { + return nil + } + return resultSet.mapToSet { SearchInfo(row: $0) } + } +}