Index articles using SQLite FTS.

This commit is contained in:
Brent Simmons 2019-02-24 15:34:10 -08:00
parent 44edbc7b09
commit e9c76e8ac0
6 changed files with 278 additions and 6 deletions

View File

@ -38,6 +38,9 @@ public final class ArticlesDatabase {
database.executeStatements("DROP TABLE if EXISTS tags;DROP INDEX if EXISTS tags_tagName_index;DROP INDEX if EXISTS articles_feedID_index;DROP INDEX if EXISTS statuses_read_index;")
}
queue.vacuumIfNeeded()
DispatchQueue.main.async {
self.articlesTable.indexUnindexedArticles()
}
}
// MARK: - Fetching Articles

View File

@ -21,6 +21,7 @@
845580761F0AF670003CCFA1 /* Article+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845580751F0AF670003CCFA1 /* Article+Database.swift */; };
8455807A1F0AF67D003CCFA1 /* ArticleStatus+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845580791F0AF67D003CCFA1 /* ArticleStatus+Database.swift */; };
8455807C1F0C0DBD003CCFA1 /* Attachment+Database.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8455807B1F0C0DBD003CCFA1 /* Attachment+Database.swift */; };
8477ACBC2221E76F00DF7F37 /* SearchTable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8477ACBB2221E76F00DF7F37 /* SearchTable.swift */; };
848AD2961F58A91E004FB0EC /* UnreadCountDictionary.swift in Sources */ = {isa = PBXBuildFile; fileRef = 848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */; };
848E3EB920FBCFD20004B7ED /* RSCore.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 848E3EB820FBCFD20004B7ED /* RSCore.framework */; };
848E3EBB20FBCFD80004B7ED /* RSParser.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 848E3EBA20FBCFD80004B7ED /* RSParser.framework */; };
@ -128,6 +129,7 @@
845580791F0AF67D003CCFA1 /* ArticleStatus+Database.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = "ArticleStatus+Database.swift"; path = "Extensions/ArticleStatus+Database.swift"; sourceTree = "<group>"; };
8455807B1F0C0DBD003CCFA1 /* Attachment+Database.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = "Attachment+Database.swift"; path = "Extensions/Attachment+Database.swift"; sourceTree = "<group>"; };
8461461E1F0ABC7300870CB3 /* RSParser.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = RSParser.xcodeproj; path = ../RSParser/RSParser.xcodeproj; sourceTree = "<group>"; };
8477ACBB2221E76F00DF7F37 /* SearchTable.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SearchTable.swift; sourceTree = "<group>"; };
848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = UnreadCountDictionary.swift; sourceTree = "<group>"; };
848E3EB820FBCFD20004B7ED /* RSCore.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = RSCore.framework; sourceTree = BUILT_PRODUCTS_DIR; };
848E3EBA20FBCFD80004B7ED /* RSParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = RSParser.framework; sourceTree = BUILT_PRODUCTS_DIR; };
@ -178,6 +180,7 @@
848AD2951F58A91E004FB0EC /* UnreadCountDictionary.swift */,
845580661F0AEBCD003CCFA1 /* Constants.swift */,
84E156EB1F0AB80E00F8CC05 /* ArticlesTable.swift */,
8477ACBB2221E76F00DF7F37 /* SearchTable.swift */,
843577151F744FC800F460AE /* DatabaseArticle.swift */,
84E156ED1F0AB81400F8CC05 /* StatusesTable.swift */,
84F20F8E1F180D8700D8E682 /* AuthorsTable.swift */,
@ -503,6 +506,7 @@
840405CF1F1A963700DF0296 /* AttachmentsTable.swift in Sources */,
84F20F8F1F180D8700D8E682 /* AuthorsTable.swift in Sources */,
84288A001F6A3C4400395871 /* DatabaseObject+Database.swift in Sources */,
8477ACBC2221E76F00DF7F37 /* SearchTable.swift in Sources */,
843577161F744FC800F460AE /* DatabaseArticle.swift in Sources */,
843702C31F70D15D00B18807 /* ParsedArticle+Database.swift in Sources */,
84E156EC1F0AB80E00F8CC05 /* ArticlesTable.swift in Sources */,

View File

@ -21,6 +21,10 @@ final class ArticlesTable: DatabaseTable {
private let authorsLookupTable: DatabaseLookupTable
private let attachmentsLookupTable: DatabaseLookupTable
private lazy var searchTable: SearchTable = {
return SearchTable(queue: queue, articlesTable: self)
}()
// TODO: update articleCutoffDate as time passes and based on user preferences.
private var articleCutoffDate = NSDate.rs_dateWithNumberOfDays(inThePast: 3 * 31)!
private var maximumArticleCutoffDate = NSDate.rs_dateWithNumberOfDays(inThePast: 4 * 31)!
@ -31,7 +35,7 @@ final class ArticlesTable: DatabaseTable {
self.accountID = accountID
self.queue = queue
self.statusesTable = StatusesTable(queue: queue)
let authorsTable = AuthorsTable(name: DatabaseTableName.authors)
self.authorsLookupTable = DatabaseLookupTable(name: DatabaseTableName.authorsLookup, objectIDKey: DatabaseKey.articleID, relatedObjectIDKey: DatabaseKey.authorID, relatedTable: authorsTable, relationshipName: RelationshipName.authors)
@ -88,6 +92,31 @@ final class ArticlesTable: DatabaseTable {
}
}
func fetchArticleSearchInfos(_ articleIDs: Set<String>, in database: FMDatabase) -> Set<ArticleSearchInfo>? {
let parameters = articleIDs.map { $0 as AnyObject }
let placeholders = NSString.rs_SQLValueList(withPlaceholders: UInt(articleIDs.count))!
let sql = "select articleID, title, contentHTML, contentText, summary, searchRowID from articles where articleID in \(placeholders);";
if let resultSet = database.executeQuery(sql, withArgumentsIn: parameters) {
return resultSet.mapToSet { (row) -> ArticleSearchInfo? in
let articleID = row.string(forColumn: DatabaseKey.articleID)!
let title = row.string(forColumn: DatabaseKey.title)
let contentHTML = row.string(forColumn: DatabaseKey.contentHTML)
let contentText = row.string(forColumn: DatabaseKey.contentText)
let summary = row.string(forColumn: DatabaseKey.summary)
let searchRowIDObject = row.object(forColumnName: DatabaseKey.searchRowID)
var searchRowID: Int? = nil
if searchRowIDObject != nil && !(searchRowIDObject is NSNull) {
searchRowID = Int(row.longLongInt(forColumn: DatabaseKey.searchRowID))
}
return ArticleSearchInfo(articleID: articleID, title: title, contentHTML: contentHTML, contentText: contentText, summary: summary, searchRowID: searchRowID)
}
}
return nil
}
// MARK: Updating
func update(_ feedID: String, _ parsedFeed: ParsedFeed, _ completion: @escaping UpdateArticlesWithFeedCompletionBlock) {
@ -104,7 +133,8 @@ final class ArticlesTable: DatabaseTable {
// 5. Create array of Articles not in database and save them.
// 6. Create array of updated Articles and save whats changed.
// 7. Call back with new and updated Articles.
// 8. Update search index.
let articleIDs = Set(parsedFeed.items.map { $0.articleID })
self.queue.update { (database) in
@ -131,6 +161,22 @@ final class ArticlesTable: DatabaseTable {
let updatedArticles = self.findAndSaveUpdatedArticles(incomingArticles, fetchedArticlesDictionary, database) //6
self.callUpdateArticlesCompletionBlock(newArticles, updatedArticles, completion) //7
// 8. Update search index.
var articlesToIndex = Set<Article>()
if let newArticles = newArticles {
articlesToIndex.formUnion(newArticles)
}
if let updatedArticles = updatedArticles {
articlesToIndex.formUnion(updatedArticles)
}
let articleIDs = articlesToIndex.articleIDs()
if articleIDs.isEmpty {
return
}
DispatchQueue.main.async() {
self.searchTable.ensureIndexedArticles(for: articleIDs)
}
}
}
@ -247,6 +293,26 @@ final class ArticlesTable: DatabaseTable {
return statusesTable.markEverywhereAsRead()
}
// MARK: Indexing
func indexUnindexedArticles() {
queue.fetch { (database) in
let sql = "select articleID from articles where searchRowID is null limit 500;"
guard let resultSet = database.executeQuery(sql, withArgumentsIn: nil) else {
return
}
let articleIDs = resultSet.mapToSet{ $0.string(forColumn: DatabaseKey.articleID) }
if articleIDs.isEmpty {
return
}
self.searchTable.ensureIndexedArticles(for: articleIDs)
DispatchQueue.main.async {
self.indexUnindexedArticles()
}
}
}
}
// MARK: - Private

View File

@ -40,6 +40,7 @@ struct DatabaseKey {
static let dateModified = "dateModified"
static let authors = "authors"
static let attachments = "attachments"
static let searchRowID = "searchRowID"
// ArticleStatus
static let read = "read"
@ -61,6 +62,10 @@ struct DatabaseKey {
static let name = "name"
static let avatarURL = "avatarURL"
static let emailAddress = "emailAddress"
// Search
static let body = "body"
static let rowID = "rowid"
}
struct RelationshipName {

View File

@ -28,16 +28,17 @@ struct DatabaseArticle: Hashable {
let datePublished: Date?
let dateModified: Date?
let status: ArticleStatus
var hashValue: Int {
return articleID.hashValue
// MARK: - Hashable
public func hash(into hasher: inout Hasher) {
hasher.combine(articleID)
}
}
extension Set where Element == DatabaseArticle {
func articleIDs() -> Set<String> {
return Set<String>(map { $0.articleID })
}
}

View File

@ -0,0 +1,193 @@
//
// SearchTable.swift
// ArticlesDatabase
//
// Created by Brent Simmons on 2/23/19.
// Copyright © 2019 Ranchero Software. All rights reserved.
//
import Foundation
import RSCore
import RSDatabase
import Articles
import RSParser
final class ArticleSearchInfo: Hashable {
let articleID: String
let title: String?
let contentHTML: String?
let contentText: String?
let summary: String?
let searchRowID: Int?
var preferredText: String {
if let body = contentHTML, !body.isEmpty {
return body
}
if let body = contentText, !body.isEmpty {
return body
}
return summary ?? ""
}
lazy var bodyForIndex: String = {
let s = preferredText.rsparser_stringByDecodingHTMLEntities()
return s.rs_string(byStrippingHTML: 0).rs_stringWithCollapsedWhitespace()
}()
init(articleID: String, title: String?, contentHTML: String?, contentText: String?, summary: String?, searchRowID: Int?) {
self.articleID = articleID
self.title = title
self.contentHTML = contentHTML
self.contentText = contentText
self.summary = summary
self.searchRowID = searchRowID
}
// MARK: Hashable
public func hash(into hasher: inout Hasher) {
hasher.combine(articleID)
}
// MARK: Equatable
static func == (lhs: ArticleSearchInfo, rhs: ArticleSearchInfo) -> Bool {
return lhs.articleID == rhs.articleID && lhs.title == rhs.title && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.searchRowID == rhs.searchRowID
}
}
final class SearchTable: DatabaseTable {
let name = "search"
private let queue: RSDatabaseQueue
private weak var articlesTable: ArticlesTable?
init(queue: RSDatabaseQueue, articlesTable: ArticlesTable) {
self.queue = queue
self.articlesTable = articlesTable
}
func ensureIndexedArticles(for articleIDs: Set<String>) {
if articleIDs.isEmpty {
return
}
queue.update { (database) in
self.ensureIndexedArticles(articleIDs, database)
}
}
}
// MARK: - Private
private extension SearchTable {
func ensureIndexedArticles(_ articleIDs: Set<String>, _ database: FMDatabase) {
guard let articlesTable = articlesTable else {
return
}
guard let articleSearchInfos = articlesTable.fetchArticleSearchInfos(articleIDs, in: database) else {
return
}
let unindexedArticles = articleSearchInfos.filter { $0.searchRowID == nil }
performInitialIndexForArticles(unindexedArticles, database)
let indexedArticles = articleSearchInfos.filter { $0.searchRowID != nil }
updateIndexForArticles(indexedArticles, database)
}
func performInitialIndexForArticles(_ articles: Set<ArticleSearchInfo>, _ database: FMDatabase) {
articles.forEach { performInitialIndex($0, database) }
}
func performInitialIndex(_ article: ArticleSearchInfo, _ database: FMDatabase) {
let rowid = insert(article, database)
articlesTable?.updateRowsWithValue(rowid, valueKey: DatabaseKey.searchRowID, whereKey: DatabaseKey.articleID, matches: [article.articleID], database: database)
}
func insert(_ article: ArticleSearchInfo, _ database: FMDatabase) -> Int {
let rowDictionary = NSMutableDictionary()
rowDictionary.setObject(article.title ?? "", forKey: DatabaseKey.title as NSString)
rowDictionary.setObject(article.bodyForIndex, forKey: DatabaseKey.body as NSString)
insertRow(rowDictionary, insertType: .normal, in: database)
return Int(database.lastInsertRowId())
}
private struct SearchInfo: Hashable {
let rowID: Int
let title: String
let body: String
init(row: FMResultSet) {
self.rowID = Int(row.longLongInt(forColumn: DatabaseKey.rowID))
self.title = row.string(forColumn: DatabaseKey.title) ?? ""
self.body = row.string(forColumn: DatabaseKey.body) ?? ""
}
// MARK: Hashable
public func hash(into hasher: inout Hasher) {
hasher.combine(rowID)
}
}
func updateIndexForArticles(_ articles: Set<ArticleSearchInfo>, _ database: FMDatabase) {
if articles.isEmpty {
return
}
guard let searchInfos = fetchSearchInfos(articles, database) else {
// The articles that get here have a non-nil searchRowID, and we should have found rows in the search table for them.
// But we didnt. Recover by doing an initial index.
performInitialIndexForArticles(articles, database)
return
}
let groupedSearchInfos = Dictionary(grouping: searchInfos, by: { $0.rowID })
let searchInfosDictionary = groupedSearchInfos.mapValues { $0.first! }
articles.forEach { (article) in
updateIndexForArticle(article, searchInfosDictionary, database)
}
}
private func updateIndexForArticle(_ article: ArticleSearchInfo, _ searchInfosDictionary: [Int: SearchInfo], _ database: FMDatabase) {
guard let searchRowID = article.searchRowID else {
assertionFailure("Expected article.searchRowID, got nil")
return
}
guard let searchInfo: SearchInfo = searchInfosDictionary[searchRowID] else {
// Shouldnt happen. The article has a searchRowID, but we didnt find that row in the search table.
// Easy to recover from: just do an initial index, and alls well.
performInitialIndex(article, database)
return
}
let title = article.title ?? ""
if title == searchInfo.title && article.bodyForIndex == searchInfo.body {
return
}
let updateDictionary = NSMutableDictionary()
if title != searchInfo.title {
updateDictionary.setObject(title, forKey: DatabaseKey.title as NSString)
}
if article.bodyForIndex != searchInfo.body {
updateDictionary.setObject(article.bodyForIndex, forKey: DatabaseKey.body as NSString)
}
updateRowsWithDictionary(updateDictionary, whereKey: DatabaseKey.rowID, matches: searchInfo.rowID, database: database)
}
private func fetchSearchInfos(_ articles: Set<ArticleSearchInfo>, _ database: FMDatabase) -> Set<SearchInfo>? {
let searchRowIDs = articles.compactMap { $0.searchRowID }
guard !searchRowIDs.isEmpty else {
return nil
}
let placeholders = NSString.rs_SQLValueList(withPlaceholders: UInt(searchRowIDs.count))!
let sql = "select rowid, title, body from \(name) where rowid in \(placeholders);"
guard let resultSet = database.executeQuery(sql, withArgumentsIn: searchRowIDs) else {
return nil
}
return resultSet.mapToSet { SearchInfo(row: $0) }
}
}