mirror of
https://github.com/Ranchero-Software/NetNewsWire.git
synced 2025-02-02 03:56:55 +01:00
Index articles using SQLite FTS.
This commit is contained in:
parent
44edbc7b09
commit
e9c76e8ac0
@ -38,6 +38,9 @@ public final class ArticlesDatabase {
|
||||
database.executeStatements("DROP TABLE if EXISTS tags;DROP INDEX if EXISTS tags_tagName_index;DROP INDEX if EXISTS articles_feedID_index;DROP INDEX if EXISTS statuses_read_index;")
|
||||
}
|
||||
queue.vacuumIfNeeded()
|
||||
DispatchQueue.main.async {
|
||||
self.articlesTable.indexUnindexedArticles()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Fetching Articles
|
||||
|
@ -21,6 +21,7 @@
|
||||
845580761F0AF670003CCFA1 /* Article+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845580751F0AF670003CCFA1 /* Article+Database.swift */; };
|
||||
8455807A1F0AF67D003CCFA1 /* ArticleStatus+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845580791F0AF67D003CCFA1 /* ArticleStatus+Database.swift */; };
|
||||
8455807C1F0C0DBD003CCFA1 /* Attachment+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8455807B1F0C0DBD003CCFA1 /* Attachment+Database.swift */; };
|
||||
8477ACBC2221E76F00DF7F37 /* SearchTable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8477ACBB2221E76F00DF7F37 /* SearchTable.swift */; };
|
||||
848AD2961F58A91E004FB0EC /* UnreadCountDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */; };
|
||||
848E3EB920FBCFD20004B7ED /* RSCore.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 848E3EB820FBCFD20004B7ED /* RSCore.framework */; };
|
||||
848E3EBB20FBCFD80004B7ED /* RSParser.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 848E3EBA20FBCFD80004B7ED /* RSParser.framework */; };
|
||||
@ -128,6 +129,7 @@
|
||||
845580791F0AF67D003CCFA1 /* ArticleStatus+Database.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = "ArticleStatus+Database.swift"; path = "Extensions/ArticleStatus+Database.swift"; sourceTree = "<group>"; };
|
||||
8455807B1F0C0DBD003CCFA1 /* Attachment+Database.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = "Attachment+Database.swift"; path = "Extensions/Attachment+Database.swift"; sourceTree = "<group>"; };
|
||||
8461461E1F0ABC7300870CB3 /* RSParser.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = RSParser.xcodeproj; path = ../RSParser/RSParser.xcodeproj; sourceTree = "<group>"; };
|
||||
8477ACBB2221E76F00DF7F37 /* SearchTable.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SearchTable.swift; sourceTree = "<group>"; };
|
||||
848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UnreadCountDictionary.swift; sourceTree = "<group>"; };
|
||||
848E3EB820FBCFD20004B7ED /* RSCore.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = RSCore.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
848E3EBA20FBCFD80004B7ED /* RSParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = RSParser.framework; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
@ -178,6 +180,7 @@
|
||||
848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */,
|
||||
845580661F0AEBCD003CCFA1 /* Constants.swift */,
|
||||
84E156EB1F0AB80E00F8CC05 /* ArticlesTable.swift */,
|
||||
8477ACBB2221E76F00DF7F37 /* SearchTable.swift */,
|
||||
843577151F744FC800F460AE /* DatabaseArticle.swift */,
|
||||
84E156ED1F0AB81400F8CC05 /* StatusesTable.swift */,
|
||||
84F20F8E1F180D8700D8E682 /* AuthorsTable.swift */,
|
||||
@ -503,6 +506,7 @@
|
||||
840405CF1F1A963700DF0296 /* AttachmentsTable.swift in Sources */,
|
||||
84F20F8F1F180D8700D8E682 /* AuthorsTable.swift in Sources */,
|
||||
84288A001F6A3C4400395871 /* DatabaseObject+Database.swift in Sources */,
|
||||
8477ACBC2221E76F00DF7F37 /* SearchTable.swift in Sources */,
|
||||
843577161F744FC800F460AE /* DatabaseArticle.swift in Sources */,
|
||||
843702C31F70D15D00B18807 /* ParsedArticle+Database.swift in Sources */,
|
||||
84E156EC1F0AB80E00F8CC05 /* ArticlesTable.swift in Sources */,
|
||||
|
@ -21,6 +21,10 @@ final class ArticlesTable: DatabaseTable {
|
||||
private let authorsLookupTable: DatabaseLookupTable
|
||||
private let attachmentsLookupTable: DatabaseLookupTable
|
||||
|
||||
private lazy var searchTable: SearchTable = {
|
||||
return SearchTable(queue: queue, articlesTable: self)
|
||||
}()
|
||||
|
||||
// TODO: update articleCutoffDate as time passes and based on user preferences.
|
||||
private var articleCutoffDate = NSDate.rs_dateWithNumberOfDays(inThePast: 3 * 31)!
|
||||
private var maximumArticleCutoffDate = NSDate.rs_dateWithNumberOfDays(inThePast: 4 * 31)!
|
||||
@ -31,7 +35,7 @@ final class ArticlesTable: DatabaseTable {
|
||||
self.accountID = accountID
|
||||
self.queue = queue
|
||||
self.statusesTable = StatusesTable(queue: queue)
|
||||
|
||||
|
||||
let authorsTable = AuthorsTable(name: DatabaseTableName.authors)
|
||||
self.authorsLookupTable = DatabaseLookupTable(name: DatabaseTableName.authorsLookup, objectIDKey: DatabaseKey.articleID, relatedObjectIDKey: DatabaseKey.authorID, relatedTable: authorsTable, relationshipName: RelationshipName.authors)
|
||||
|
||||
@ -88,6 +92,31 @@ final class ArticlesTable: DatabaseTable {
|
||||
}
|
||||
}
|
||||
|
||||
func fetchArticleSearchInfos(_ articleIDs: Set<String>, in database: FMDatabase) -> Set<ArticleSearchInfo>? {
|
||||
let parameters = articleIDs.map { $0 as AnyObject }
|
||||
let placeholders = NSString.rs_SQLValueList(withPlaceholders: UInt(articleIDs.count))!
|
||||
let sql = "select articleID, title, contentHTML, contentText, summary, searchRowID from articles where articleID in \(placeholders);";
|
||||
|
||||
if let resultSet = database.executeQuery(sql, withArgumentsIn: parameters) {
|
||||
return resultSet.mapToSet { (row) -> ArticleSearchInfo? in
|
||||
let articleID = row.string(forColumn: DatabaseKey.articleID)!
|
||||
let title = row.string(forColumn: DatabaseKey.title)
|
||||
let contentHTML = row.string(forColumn: DatabaseKey.contentHTML)
|
||||
let contentText = row.string(forColumn: DatabaseKey.contentText)
|
||||
let summary = row.string(forColumn: DatabaseKey.summary)
|
||||
|
||||
let searchRowIDObject = row.object(forColumnName: DatabaseKey.searchRowID)
|
||||
var searchRowID: Int? = nil
|
||||
if searchRowIDObject != nil && !(searchRowIDObject is NSNull) {
|
||||
searchRowID = Int(row.longLongInt(forColumn: DatabaseKey.searchRowID))
|
||||
}
|
||||
|
||||
return ArticleSearchInfo(articleID: articleID, title: title, contentHTML: contentHTML, contentText: contentText, summary: summary, searchRowID: searchRowID)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// MARK: Updating
|
||||
|
||||
func update(_ feedID: String, _ parsedFeed: ParsedFeed, _ completion: @escaping UpdateArticlesWithFeedCompletionBlock) {
|
||||
@ -104,7 +133,8 @@ final class ArticlesTable: DatabaseTable {
|
||||
// 5. Create array of Articles not in database and save them.
|
||||
// 6. Create array of updated Articles and save what’s changed.
|
||||
// 7. Call back with new and updated Articles.
|
||||
|
||||
// 8. Update search index.
|
||||
|
||||
let articleIDs = Set(parsedFeed.items.map { $0.articleID })
|
||||
|
||||
self.queue.update { (database) in
|
||||
@ -131,6 +161,22 @@ final class ArticlesTable: DatabaseTable {
|
||||
let updatedArticles = self.findAndSaveUpdatedArticles(incomingArticles, fetchedArticlesDictionary, database) //6
|
||||
|
||||
self.callUpdateArticlesCompletionBlock(newArticles, updatedArticles, completion) //7
|
||||
|
||||
// 8. Update search index.
|
||||
var articlesToIndex = Set<Article>()
|
||||
if let newArticles = newArticles {
|
||||
articlesToIndex.formUnion(newArticles)
|
||||
}
|
||||
if let updatedArticles = updatedArticles {
|
||||
articlesToIndex.formUnion(updatedArticles)
|
||||
}
|
||||
let articleIDs = articlesToIndex.articleIDs()
|
||||
if articleIDs.isEmpty {
|
||||
return
|
||||
}
|
||||
DispatchQueue.main.async() {
|
||||
self.searchTable.ensureIndexedArticles(for: articleIDs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -247,6 +293,26 @@ final class ArticlesTable: DatabaseTable {
|
||||
|
||||
return statusesTable.markEverywhereAsRead()
|
||||
}
|
||||
|
||||
// MARK: Indexing
|
||||
|
||||
func indexUnindexedArticles() {
|
||||
queue.fetch { (database) in
|
||||
let sql = "select articleID from articles where searchRowID is null limit 500;"
|
||||
guard let resultSet = database.executeQuery(sql, withArgumentsIn: nil) else {
|
||||
return
|
||||
}
|
||||
let articleIDs = resultSet.mapToSet{ $0.string(forColumn: DatabaseKey.articleID) }
|
||||
if articleIDs.isEmpty {
|
||||
return
|
||||
}
|
||||
self.searchTable.ensureIndexedArticles(for: articleIDs)
|
||||
|
||||
DispatchQueue.main.async {
|
||||
self.indexUnindexedArticles()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private
|
||||
|
@ -40,6 +40,7 @@ struct DatabaseKey {
|
||||
static let dateModified = "dateModified"
|
||||
static let authors = "authors"
|
||||
static let attachments = "attachments"
|
||||
static let searchRowID = "searchRowID"
|
||||
|
||||
// ArticleStatus
|
||||
static let read = "read"
|
||||
@ -61,6 +62,10 @@ struct DatabaseKey {
|
||||
static let name = "name"
|
||||
static let avatarURL = "avatarURL"
|
||||
static let emailAddress = "emailAddress"
|
||||
|
||||
// Search
|
||||
static let body = "body"
|
||||
static let rowID = "rowid"
|
||||
}
|
||||
|
||||
struct RelationshipName {
|
||||
|
@ -28,16 +28,17 @@ struct DatabaseArticle: Hashable {
|
||||
let datePublished: Date?
|
||||
let dateModified: Date?
|
||||
let status: ArticleStatus
|
||||
var hashValue: Int {
|
||||
return articleID.hashValue
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(articleID)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extension Set where Element == DatabaseArticle {
|
||||
|
||||
func articleIDs() -> Set<String> {
|
||||
|
||||
return Set<String>(map { $0.articleID })
|
||||
}
|
||||
}
|
||||
|
193
Frameworks/ArticlesDatabase/SearchTable.swift
Normal file
193
Frameworks/ArticlesDatabase/SearchTable.swift
Normal file
@ -0,0 +1,193 @@
|
||||
//
|
||||
// SearchTable.swift
|
||||
// ArticlesDatabase
|
||||
//
|
||||
// Created by Brent Simmons on 2/23/19.
|
||||
// Copyright © 2019 Ranchero Software. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import RSCore
|
||||
import RSDatabase
|
||||
import Articles
|
||||
import RSParser
|
||||
|
||||
final class ArticleSearchInfo: Hashable {
|
||||
|
||||
let articleID: String
|
||||
let title: String?
|
||||
let contentHTML: String?
|
||||
let contentText: String?
|
||||
let summary: String?
|
||||
let searchRowID: Int?
|
||||
|
||||
var preferredText: String {
|
||||
if let body = contentHTML, !body.isEmpty {
|
||||
return body
|
||||
}
|
||||
if let body = contentText, !body.isEmpty {
|
||||
return body
|
||||
}
|
||||
return summary ?? ""
|
||||
}
|
||||
|
||||
lazy var bodyForIndex: String = {
|
||||
let s = preferredText.rsparser_stringByDecodingHTMLEntities()
|
||||
return s.rs_string(byStrippingHTML: 0).rs_stringWithCollapsedWhitespace()
|
||||
}()
|
||||
|
||||
init(articleID: String, title: String?, contentHTML: String?, contentText: String?, summary: String?, searchRowID: Int?) {
|
||||
self.articleID = articleID
|
||||
self.title = title
|
||||
self.contentHTML = contentHTML
|
||||
self.contentText = contentText
|
||||
self.summary = summary
|
||||
self.searchRowID = searchRowID
|
||||
}
|
||||
|
||||
// MARK: Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(articleID)
|
||||
}
|
||||
|
||||
// MARK: Equatable
|
||||
|
||||
static func == (lhs: ArticleSearchInfo, rhs: ArticleSearchInfo) -> Bool {
|
||||
return lhs.articleID == rhs.articleID && lhs.title == rhs.title && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.searchRowID == rhs.searchRowID
|
||||
}
|
||||
}
|
||||
|
||||
final class SearchTable: DatabaseTable {
|
||||
|
||||
let name = "search"
|
||||
private let queue: RSDatabaseQueue
|
||||
private weak var articlesTable: ArticlesTable?
|
||||
|
||||
init(queue: RSDatabaseQueue, articlesTable: ArticlesTable) {
|
||||
self.queue = queue
|
||||
self.articlesTable = articlesTable
|
||||
}
|
||||
|
||||
func ensureIndexedArticles(for articleIDs: Set<String>) {
|
||||
if articleIDs.isEmpty {
|
||||
return
|
||||
}
|
||||
queue.update { (database) in
|
||||
self.ensureIndexedArticles(articleIDs, database)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Private
|
||||
|
||||
private extension SearchTable {
|
||||
|
||||
func ensureIndexedArticles(_ articleIDs: Set<String>, _ database: FMDatabase) {
|
||||
guard let articlesTable = articlesTable else {
|
||||
return
|
||||
}
|
||||
guard let articleSearchInfos = articlesTable.fetchArticleSearchInfos(articleIDs, in: database) else {
|
||||
return
|
||||
}
|
||||
|
||||
let unindexedArticles = articleSearchInfos.filter { $0.searchRowID == nil }
|
||||
performInitialIndexForArticles(unindexedArticles, database)
|
||||
|
||||
let indexedArticles = articleSearchInfos.filter { $0.searchRowID != nil }
|
||||
updateIndexForArticles(indexedArticles, database)
|
||||
}
|
||||
|
||||
func performInitialIndexForArticles(_ articles: Set<ArticleSearchInfo>, _ database: FMDatabase) {
|
||||
articles.forEach { performInitialIndex($0, database) }
|
||||
}
|
||||
|
||||
func performInitialIndex(_ article: ArticleSearchInfo, _ database: FMDatabase) {
|
||||
let rowid = insert(article, database)
|
||||
articlesTable?.updateRowsWithValue(rowid, valueKey: DatabaseKey.searchRowID, whereKey: DatabaseKey.articleID, matches: [article.articleID], database: database)
|
||||
}
|
||||
|
||||
func insert(_ article: ArticleSearchInfo, _ database: FMDatabase) -> Int {
|
||||
let rowDictionary = NSMutableDictionary()
|
||||
rowDictionary.setObject(article.title ?? "", forKey: DatabaseKey.title as NSString)
|
||||
rowDictionary.setObject(article.bodyForIndex, forKey: DatabaseKey.body as NSString)
|
||||
insertRow(rowDictionary, insertType: .normal, in: database)
|
||||
return Int(database.lastInsertRowId())
|
||||
}
|
||||
|
||||
private struct SearchInfo: Hashable {
|
||||
let rowID: Int
|
||||
let title: String
|
||||
let body: String
|
||||
|
||||
init(row: FMResultSet) {
|
||||
self.rowID = Int(row.longLongInt(forColumn: DatabaseKey.rowID))
|
||||
self.title = row.string(forColumn: DatabaseKey.title) ?? ""
|
||||
self.body = row.string(forColumn: DatabaseKey.body) ?? ""
|
||||
}
|
||||
|
||||
// MARK: Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(rowID)
|
||||
}
|
||||
}
|
||||
|
||||
func updateIndexForArticles(_ articles: Set<ArticleSearchInfo>, _ database: FMDatabase) {
|
||||
if articles.isEmpty {
|
||||
return
|
||||
}
|
||||
guard let searchInfos = fetchSearchInfos(articles, database) else {
|
||||
// The articles that get here have a non-nil searchRowID, and we should have found rows in the search table for them.
|
||||
// But we didn’t. Recover by doing an initial index.
|
||||
performInitialIndexForArticles(articles, database)
|
||||
return
|
||||
}
|
||||
let groupedSearchInfos = Dictionary(grouping: searchInfos, by: { $0.rowID })
|
||||
let searchInfosDictionary = groupedSearchInfos.mapValues { $0.first! }
|
||||
|
||||
articles.forEach { (article) in
|
||||
updateIndexForArticle(article, searchInfosDictionary, database)
|
||||
}
|
||||
}
|
||||
|
||||
private func updateIndexForArticle(_ article: ArticleSearchInfo, _ searchInfosDictionary: [Int: SearchInfo], _ database: FMDatabase) {
|
||||
guard let searchRowID = article.searchRowID else {
|
||||
assertionFailure("Expected article.searchRowID, got nil")
|
||||
return
|
||||
}
|
||||
guard let searchInfo: SearchInfo = searchInfosDictionary[searchRowID] else {
|
||||
// Shouldn’t happen. The article has a searchRowID, but we didn’t find that row in the search table.
|
||||
// Easy to recover from: just do an initial index, and all’s well.
|
||||
performInitialIndex(article, database)
|
||||
return
|
||||
}
|
||||
|
||||
let title = article.title ?? ""
|
||||
if title == searchInfo.title && article.bodyForIndex == searchInfo.body {
|
||||
return
|
||||
}
|
||||
|
||||
let updateDictionary = NSMutableDictionary()
|
||||
if title != searchInfo.title {
|
||||
updateDictionary.setObject(title, forKey: DatabaseKey.title as NSString)
|
||||
}
|
||||
if article.bodyForIndex != searchInfo.body {
|
||||
updateDictionary.setObject(article.bodyForIndex, forKey: DatabaseKey.body as NSString)
|
||||
}
|
||||
updateRowsWithDictionary(updateDictionary, whereKey: DatabaseKey.rowID, matches: searchInfo.rowID, database: database)
|
||||
}
|
||||
|
||||
private func fetchSearchInfos(_ articles: Set<ArticleSearchInfo>, _ database: FMDatabase) -> Set<SearchInfo>? {
|
||||
let searchRowIDs = articles.compactMap { $0.searchRowID }
|
||||
guard !searchRowIDs.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
let placeholders = NSString.rs_SQLValueList(withPlaceholders: UInt(searchRowIDs.count))!
|
||||
let sql = "select rowid, title, body from \(name) where rowid in \(placeholders);"
|
||||
guard let resultSet = database.executeQuery(sql, withArgumentsIn: searchRowIDs) else {
|
||||
return nil
|
||||
}
|
||||
return resultSet.mapToSet { SearchInfo(row: $0) }
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user