Progress on lookup tables.

This commit is contained in:
Brent Simmons 2017-08-06 21:16:13 -07:00
parent 4503f771da
commit 570d70d8fe
6 changed files with 305 additions and 209 deletions

View File

@ -28,6 +28,8 @@ import Data
// Because:
// * They dont take up much space.
// * It seriously cuts down on the number of database reads and writes.
//
// CREATE TABLE if not EXISTS attachments(databaseID TEXT NOT NULL PRIMARY KEY, articleID TEXT NOT NULL, url TEXT NOT NULL, mimeType TEXT, title TEXT, sizeInBytes INTEGER, durationInSeconds INTEGER);
final class AttachmentsTable: DatabaseTable {

View File

@ -23,7 +23,6 @@ final class AuthorsTable: DatabaseTable {
let queue: RSDatabaseQueue
private let cache = ObjectCache<Author>(keyPathForID: \Author.databaseID)
private var articleIDToAuthorsCache = [String: Set<Author>]()
private var articleIDsWithNoAuthors = Set<String>()
private let authorsLookupTable = LookupTable(name: DatabaseTableName.authorsLookup, primaryKey: DatabaseKey.authorID, foreignKey: DatabaseKey.articleID)
init(name: String, queue: RSDatabaseQueue) {
@ -36,15 +35,15 @@ final class AuthorsTable: DatabaseTable {
attachCachedAuthors(articles)
let articlesNeedingAuthors = articlesMissingAuthors(articles)
if articlesNeedingAuthors.isEmpty {
let articlesMissingAuthors = articlesNeedingAuthors(articles)
if articlesMissingAuthors.isEmpty {
return
}
let articleIDs = Set(articlesNeedingAuthors.map { $0.databaseID })
let articleIDs = Set(articlesMissingAuthors.map { $0.databaseID })
let authorTable = fetchAuthorsForArticleIDs(articleIDs, database)
for article in articlesNeedingAuthors {
for article in articlesMissingAuthors {
let articleID = article.databaseID
@ -70,24 +69,16 @@ private extension AuthorsTable {
}
}
func articlesMissingAuthors(_ articles: Set<Article>) -> Set<Article> {
func articlesNeedingAuthors(_ articles: Set<Article>) -> Set<Article> {
return articles.filter{ (article) -> Bool in
if let _ = article.authors {
return false
}
if articleIDsWithNoAuthors.contains(article.databaseID) {
return false
}
return true
}
// If article.authors is nil and article is not known to have zero authors, include it in the set.
let articlesWithNoAuthors = articles.withNilProperty(\Article.authors)
return Set(articlesWithNoAuthors.filter { !articleIDsWithNoAuthors.contains($0.databaseID) })
}
func fetchAuthorsForArticleIDs(_ articleIDs: Set<String>, _ database: FMDatabase) -> [String: Set<Author>]? {
let lookupValues = authorsLookupTable.fetchLookupValues(articleIDs, database: database)
let lookupValueDictionary = authorsLookupTable.fetchLookupTableDictionary(articleIDs, database)
let authorIDs = Set(lookupValues.map { $0.primaryID })
if authorIDs.isEmpty {
return nil

View File

@ -48,3 +48,16 @@ extension Article {
return d.copy() as! NSDictionary
}
}
extension Set where Element == Article {
func withNilProperty<T>(_ keyPath: KeyPath<Article,T?>) -> Set<Article> {
return Set(filter{ $0[keyPath: keyPath] == nil })
}
func articleIDs() -> Set<String> {
return Set(map { $0.databaseID })
}
}

View File

@ -121,7 +121,7 @@ private extension StatusesTable {
func fetchAndCacheStatusesForArticles(_ articles: Set<Article>, _ database: FMDatabase) {
fetchAndCacheStatusesForArticleIDs(articleIDsFromArticles(articles), database)
fetchAndCacheStatusesForArticleIDs(articles.articleIDs(), database)
}
func fetchAndCacheStatusesForArticleIDs(_ articleIDs: Set<String>, _ database: FMDatabase) {
@ -210,11 +210,6 @@ private extension StatusesTable {
// MARK: Utilities
func articleIDsFromArticles(_ articles: Set<Article>) -> Set<String> {
return Set(articles.map { $0.databaseID })
}
func articleIDsMissingCachedStatuses(_ articleIDs: Set<String>) -> Set<String> {
return Set(articleIDs.filter { !cache.objectWithIDIsCached($0) })
@ -222,13 +217,7 @@ private extension StatusesTable {
func articlesMissingStatuses(_ articles: Set<Article>) -> Set<Article> {
let missing = articles.flatMap { (article) -> Article? in
if article.status == nil {
return article
}
return nil
}
return Set(missing)
return articles.withNilProperty(\Article.status)
}
}

View File

@ -13,10 +13,6 @@ import Data
// Article->tags is a many-to-many relationship.
// Since a tag is just a simple string, the tags table and the lookup table are the same table.
//
// Tags  and the non-existence of tags are cached, once fetched, for the lifetime of the run.
// This uses some extra memory but cuts way down on the amount of database time spent
// maintaining the tags table.
//
// CREATE TABLE if not EXISTS tags(tagName TEXT NOT NULL, articleID TEXT NOT NULL, PRIMARY KEY(tagName, articleID));
// CREATE INDEX if not EXISTS tags_tagName_index on tags (tagName COLLATE NOCASE);
@ -26,194 +22,214 @@ final class TagsTable: DatabaseTable {
let name: String
let queue: RSDatabaseQueue
private var articleIDCache = [String: TagNameSet]() // articleID: tags
private var articleIDsWithNoTags = Set<String>
let lookupTable: LookupTable
init(name: String, queue: RSDatabaseQueue) {
self.name = name
self.queue = queue
self.lookupTable = LookupTable(name: DatabaseTableName.tags, primaryKey: DatabaseKey.tagName, foreignKey: DatabaseKey.articleID)
}
func saveTagsForArticles(_ articles: Set<Article>) {
func attachTags(_ articles: Set<Article>, _ database: FMDatabase) {
var articlesToSaveTags = Set<Article>()
var articlesToRemoveTags = Set<Article>()
articles.forEach { (oneArticle) in
if articleTagsMatchCache(oneArticle) {
return
}
if let tags = oneArticle.tags {
articlesToSaveTags.insert(oneArticle)
}
else {
articlesToRemoveTags.insert(oneArticle)
}
guard let lookupTableDictionary = lookupTable.fetchLookupTableDictionary(articleIDs, database) else {
return
}
if !articlesToSaveTags.isEmpty {
updateTagsForArticles(articlesToSaveTags)
}
if !articlesToRemoveTags.isEmpty {
removeArticleFromTags(articlesToRemoveTags)
for article in articles {
if let lookupValues = lookupTableDictionary[article.databaseID] {
article.tags = lookupValues.tags()
}
}
}
// func saveTagsForArticles(_ articles: Set<Article>) {
//
// var articlesToSaveTags = Set<Article>()
// var articlesToRemoveTags = Set<Article>()
//
// articles.forEach { (oneArticle) in
//
// if articleTagsMatchCache(oneArticle) {
// return
// }
// if let tags = oneArticle.tags {
// articlesToSaveTags.insert(oneArticle)
// }
// else {
// articlesToRemoveTags.insert(oneArticle)
// }
// }
//
// if !articlesToSaveTags.isEmpty {
// updateTagsForArticles(articlesToSaveTags)
// }
//
// if !articlesToRemoveTags.isEmpty {
// removeArticleFromTags(articlesToRemoveTags)
// }
// }
}
private extension TagsTable {
func cacheTagsForArticle(_ article: Article, tags: TagNameSet) {
// func cacheTagsForArticle(_ article: Article, tags: TagNameSet) {
//
// articleIDsWithNoTags.remove(article.articleID)
// articleIDCache[article.articleID] = tags
// }
//
// func cachedTagsForArticleID(_ articleID: String) -> TagNameSet? {
//
// return articleIDsCache[articleID]
// }
//
// func articleTagsMatchCache(_ article: Article) -> Bool {
//
// if let tags = article.tags {
// return tags == articleIDCache[article.articleID]
// }
// return articleIDIsKnowToHaveNoTags(article.articleID)
// }
//
// func articleIDIsKnownToHaveNoTags(_ articleID: String) -> Bool {
//
// return articleIDsWithNoTags.contains(articleID)
// }
//
// func removeTagsFromCacheForArticleID(_ articleID: String) {
//
// articleIDsCache[oneArticleID] = nil
// articleIDsWithNoTags.insert(oneArticleID)
// }
//
// func removeArticleFromTags(_ articles: Set<Article>) {
//
// var articleIDsToRemove = [String]()
//
// articles.forEach { (oneArticle) in
// let oneArticleID = oneArticle.articleID
// if articleIDIsKnownToHaveNoTags(oneArticle) {
// return
// }
// articleIDsToRemove += oneArticleID
// removeTagsFromCacheForArticleID(oneArticleID)
// }
//
// if !articleIDsToRemove.isEmpty {
// queue.update { (database) in
// database.rs_deleteRowsWhereKey(DatabaseKey.articleID, inValues: articleIDsToRemove, tableName: DatabaseTableName.tags)
// }
// }
// }
//
// typealias TagsTable = [String: TagNameSet] // [articleID: Set<tagName>]
//
// func updateTagsForArticles(_ articles: Set<Article>) {
//
// var tagsForArticleIDs = TagsTable()
// articles.forEach { (oneArticle)
// if let tags = oneArticle.tags {
// cacheTagsForArticle(oneArticle, tags)
// tagsForArticleIDs[oneArticle.articleID] = oneArticle.tags
// }
// else {
// assertionFailure("article must have tags")
// }
// }
//
// if tagsForArticleIDs.isEmpty { // Shouldnt be empty
// return
// }
// let articleIDs = tagsForArticleIDs.keys
//
// queue.update { (database) in
//
// let existingTags = self.fetchTagsForArticleIDs(articleIDs, database: database)
// self.syncIncomingAndExistingTags(incomingTags: tagsForArticleIDs, existingTags: existingTags, database: database)
// }
// }
//
// func syncIncomingAndExistingTags(incomingTags: TagsTable, existingTags: TagsTable, database: database) {
//
// for (oneArticleID, oneTagNames) in incomingTags {
// if let existingTagNames = existingTags[oneArticleID] {
// syncIncomingAndExistingTagsForArticleID(oneArticleID, incomingTagNames: oneTagNames, existingTagNames: existingTagNames, database: database)
// }
// else {
// saveIncomingTagsForArticleID(oneArticleID, tagNames: oneTagNames, database: database)
// }
// }
// }
//
// func saveIncomingTagsForArticleID(_ articleID: String, tagNames: TagNameSet, database: FMDatabase) {
//
// // No existing tags in database. Simple save.
//
// for oneTagName in tagNames {
// let oneDictionary = [DatabaseTableName.articleID: articleID, DatabaseTableName.tagName: oneTagName]
// database.rs_insertRow(with: oneDictionary, insertType: .OrIgnore, tableName: DatabaseTableName.tags)
// }
// }
//
// func syncingIncomingAndExistingTagsForArticleID(_ articleID: String, incomingTagNames: TagNameSet, existingTagNames: TagNameSet, database: FMDatabase) {
//
// if incomingTagNames == existingTagNames {
// return
// }
//
// var tagsToRemove = TagNameSet()
// for oneExistingTagName in existingTagNames {
// if !incomingTagNames.contains(oneExistingTagName) {
// tagsToRemove.insert(oneExistingTagName)
// }
// }
//
// var tagsToAdd = TagNameSet()
// for oneIncomingTagName in incomingTagNames {
// if !existingTagNames.contains(oneIncomingTagName) {
// tagsToAdd.insert(oneIncomingTagName)
// }
// }
//
// if !tagsToRemove.isEmpty {
// let placeholders = NSString.rs_SQLValueListWithPlaceholders
// let sql = "delete from \(DatabaseTableName.tags) where \(DatabaseKey.articleID) = ? and \(DatabaseKey.tagName) in "
// database.executeUpdate(sql, withArgumentsIn: [articleID, ])
// }
// }
//
// func fetchTagsForArticleIDs(_ articleIDs: Set<String>, database: FMDatabase) -> TagsTable {
//
// var tagSpecifiers = TagsTable()
//
// guard let rs = database.rs_selectRowsWhereKey(DatabaseKey.articleID, inValues: Array(articleIDs), tableName: DatabaseTableName.tags) else {
// return tagSpecifiers
// }
//
// while rs.next() {
//
// guard let oneTagName = rs.string(forColumn: DatabaseKey.tagName), let oneArticleID = rs.string(forColumn: DatabaseKey.articleID) else {
// continue
// }
// if tagSpecifiers[oneArticleID] == nil {
// tagSpecifiers[oneArticleID] = Set([oneTagName])
// }
// else {
// tagSpecifiers[oneArticleID]!.insert(oneTagName)
// }
// }
//
// return tagSpecifiers
// }
}
articleIDsWithNoTags.remove(article.articleID)
articleIDCache[article.articleID] = tags
}
private extension Set where Element == LookupValue {
func cachedTagsForArticleID(_ articleID: String) -> TagNameSet? {
func tags() -> Set<String> {
return articleIDsCache[articleID]
}
func articleTagsMatchCache(_ article: Article) -> Bool {
if let tags = article.tags {
return tags == articleIDCache[article.articleID]
}
return articleIDIsKnowToHaveNoTags(article.articleID)
}
func articleIDIsKnownToHaveNoTags(_ articleID: String) -> Bool {
return articleIDsWithNoTags.contains(articleID)
}
func removeTagsFromCacheForArticleID(_ articleID: String) {
articleIDsCache[oneArticleID] = nil
articleIDsWithNoTags.insert(oneArticleID)
}
func removeArticleFromTags(_ articles: Set<Article>) {
var articleIDsToRemove = [String]()
articles.forEach { (oneArticle) in
let oneArticleID = oneArticle.articleID
if articleIDIsKnownToHaveNoTags(oneArticle) {
return
}
articleIDsToRemove += oneArticleID
removeTagsFromCacheForArticleID(oneArticleID)
}
if !articleIDsToRemove.isEmpty {
queue.update { (database) in
database.rs_deleteRowsWhereKey(DatabaseKey.articleID, inValues: articleIDsToRemove, tableName: DatabaseTableName.tags)
}
}
}
typealias TagsTable = [String: TagNameSet] // [articleID: Set<tagName>]
func updateTagsForArticles(_ articles: Set<Article>) {
var tagsForArticleIDs = TagsTable()
articles.forEach { (oneArticle)
if let tags = oneArticle.tags {
cacheTagsForArticle(oneArticle, tags)
tagsForArticleIDs[oneArticle.articleID] = oneArticle.tags
}
else {
assertionFailure("article must have tags")
}
}
if tagsForArticleIDs.isEmpty { // Shouldnt be empty
return
}
let articleIDs = tagsForArticleIDs.keys
queue.update { (database) in
let existingTags = self.fetchTagsForArticleIDs(articleIDs, database: database)
self.syncIncomingAndExistingTags(incomingTags: tagsForArticleIDs, existingTags: existingTags, database: database)
}
}
func syncIncomingAndExistingTags(incomingTags: TagsTable, existingTags: TagsTable, database: database) {
for (oneArticleID, oneTagNames) in incomingTags {
if let existingTagNames = existingTags[oneArticleID] {
syncIncomingAndExistingTagsForArticleID(oneArticleID, incomingTagNames: oneTagNames, existingTagNames: existingTagNames, database: database)
}
else {
saveIncomingTagsForArticleID(oneArticleID, tagNames: oneTagNames, database: database)
}
}
}
func saveIncomingTagsForArticleID(_ articleID: String, tagNames: TagNameSet, database: FMDatabase) {
// No existing tags in database. Simple save.
for oneTagName in tagNames {
let oneDictionary = [DatabaseTableName.articleID: articleID, DatabaseTableName.tagName: oneTagName]
database.rs_insertRow(with: oneDictionary, insertType: .OrIgnore, tableName: DatabaseTableName.tags)
}
}
func syncingIncomingAndExistingTagsForArticleID(_ articleID: String, incomingTagNames: TagNameSet, existingTagNames: TagNameSet, database: FMDatabase) {
if incomingTagNames == existingTagNames {
return
}
var tagsToRemove = TagNameSet()
for oneExistingTagName in existingTagNames {
if !incomingTagNames.contains(oneExistingTagName) {
tagsToRemove.insert(oneExistingTagName)
}
}
var tagsToAdd = TagNameSet()
for oneIncomingTagName in incomingTagNames {
if !existingTagNames.contains(oneIncomingTagName) {
tagsToAdd.insert(oneIncomingTagName)
}
}
if !tagsToRemove.isEmpty {
let placeholders = NSString.rs_SQLValueListWithPlaceholders
let sql = "delete from \(DatabaseTableName.tags) where \(DatabaseKey.articleID) = ? and \(DatabaseKey.tagName) in "
database.executeUpdate(sql, withArgumentsIn: [articleID, ])
}
}
func fetchTagsForArticleIDs(_ articleIDs: Set<String>, database: FMDatabase) -> TagsTable {
var tagSpecifiers = TagsTable()
guard let rs = database.rs_selectRowsWhereKey(DatabaseKey.articleID, inValues: Array(articleIDs), tableName: DatabaseTableName.tags) else {
return tagSpecifiers
}
while rs.next() {
guard let oneTagName = rs.string(forColumn: DatabaseKey.tagName), let oneArticleID = rs.string(forColumn: DatabaseKey.articleID) else {
continue
}
if tagSpecifiers[oneArticleID] == nil {
tagSpecifiers[oneArticleID] = Set([oneTagName])
}
else {
tagSpecifiers[oneArticleID]!.insert(oneTagName)
}
}
return tagSpecifiers
return Set(flatMap{ $0.primaryID })
}
}

View File

@ -11,12 +11,19 @@ import Foundation
// Implement a lookup table for a many-to-many relationship.
// Example: CREATE TABLE if not EXISTS authorLookup (authorID TEXT NOT NULL, articleID TEXT NOT NULL, PRIMARY KEY(authorID, articleID));
// authorID is primaryKey; articleID is foreignKey.
//
// foreignIDsWithNoRelationship: caches the foreignIDs where its known that theres no relationship.
// lookupsByForeignID: caches the LookupValues for a foreignID.
public struct LookupTable {
typealias LookupTableDictionary = [String: Set<LookupValue>] // key is foreignID
public final class LookupTable {
let name: String
let primaryKey: String
let foreignKey: String
private var foreignIDsWithNoRelationship = Set<String>()
private var lookupsByForeignID = LookupTableDictionary()
public init(name: String, primaryKey: String, foreignKey: String) {
@ -25,17 +32,94 @@ public struct LookupTable {
self.foreignKey = foreignKey
}
public func fetchLookupValues(_ foreignIDs: Set<String>, database: FMDatabase) -> Set<LookupValue> {
public func fetchLookupTableDictionary(_ foreignIDs: Set<String>, _ database: FMDatabase) -> LookupTableDictionary? {
guard let resultSet = database.rs_selectRowsWhereKey(foreignKey, inValues: Array(foreignIDs), tableName: name) else {
return Set<LookupValue>()
let foreignIDsToLookup = foreignIDs.subtracting(foreignIDsWithNoRelationship)
if foreignIDsToLookup.isEmpty {
return nil
}
return lookupValuesWithResultSet(resultSet)
var lookupValues = Set<LookupValue>()
var foreignIDsToFetchFromDatabase = Set<String>()
// Pull from cache.
for oneForeignID in foreignIDsToLookup {
if let cachedLookups = lookupsByForeignID[oneForeignID] {
lookupValues.formUnion(cachedLookups)
}
else {
foreignIDsToFetchFromDatabase.insert(oneForeignID)
}
}
if !foreignIDsToFetchFromDatabase.isEmpty {
if let resultSet = database.rs_selectRowsWhereKey(foreignKey, inValues: Array(foreignIDsToLookup), tableName: name) {
lookupValues.formUnion(lookupValuesWithResultSet(resultSet))
}
}
cacheNotFoundForeignIDs(lookupValues, foreignIDsToFetchFromDatabase)
cacheLookupValues(lookupValues)
return lookupTableDictionary(with: lookupValues)
}
public func removeLookupsForForeignIDs(_ foreignIDs: Set<String>, _ database: FMDatabase) {
let foreignIDsToRemove = foreignIDs.subtracting(foreignIDsWithNoRelationship)
if foreignIDsToRemove.isEmpty {
return
}
for oneForeignID in foreignIDsToRemove {
lookupsByForeignID[oneForeignID] = nil
}
foreignIDsWithNoRelationship.formUnion(foreignIDsToRemove)
database.rs_deleteRowsWhereKey(foreignKey, inValues: Array(foreignIDsToRemove), tableName: name)
}
}
private extension LookupTable {
func addToLookupTableDictionary(_ lookupValues: Set<LookupValue>, _ table: inout LookupTableDictionary) {
for lookupValue in lookupValues {
let foreignID = lookupValue.foreignID
let primaryID = lookupValue.primaryID
if table[foreignID] == nil {
table[foreignID] = Set([primaryID])
}
else {
table[foreignID]!.insert(primaryID)
}
}
}
func lookupTableDictionary(with lookupValues: Set<LookupValue>) -> LookupTableDictionary {
var d = LookupTableDictionary()
addToLookupTableDictionary(lookupValues, &d)
return d
}
func cacheLookupValues(_ lookupValues: Set<LookupValue>) {
addToLookupTableDictionary(lookupValues, &lookupsByForeignID)
}
func cacheNotFoundForeignIDs(_ lookupValues: Set<LookupValue>, _ foreignIDs: Set<String>) {
// Note where nothing was found, and cache the foreignID in foreignIDsWithNoRelationship.
let foundForeignIDs = Set(lookupValues.map { $0.foreignID })
for foreignID in foreignIDs {
if !foundForeignIDs.contains(foreignID) {
foreignIDsWithNoRelationship.insert(foreignID)
}
}
}
func lookupValuesWithResultSet(_ resultSet: FMResultSet) -> Set<LookupValue> {
return resultSet.mapToSet(lookupValueWithRow)
@ -71,3 +155,4 @@ public struct LookupValue: Hashable {
return lhs.primaryID == rhs.primaryID && lhs.foreignID == rhs.foreignID
}
}