diff --git a/Metatext.xcodeproj/project.pbxproj b/Metatext.xcodeproj/project.pbxproj index 7f69cad..04159c1 100644 --- a/Metatext.xcodeproj/project.pbxproj +++ b/Metatext.xcodeproj/project.pbxproj @@ -86,6 +86,7 @@ D047FA8C24C3E21200AF17C5 /* Metatext.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Metatext.app; sourceTree = BUILT_PRODUCTS_DIR; }; D0666A2124C677B400F3F04B /* Tests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = Tests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; D0666A2524C677B400F3F04B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + D07E164425037264008B10D0 /* SerializableBloomFilter */ = {isa = PBXFileReference; lastKnownFileType = folder; path = SerializableBloomFilter; sourceTree = ""; }; D085C3BB25008DEC008A6C5E /* DB */ = {isa = PBXFileReference; lastKnownFileType = folder; path = DB; sourceTree = ""; }; D0BDF66524FD7A6400C7FA1C /* ServiceLayer */ = {isa = PBXFileReference; lastKnownFileType = folder; path = ServiceLayer; sourceTree = ""; }; D0BEB1F224F8EE8C001B0F04 /* AttachmentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AttachmentView.swift; sourceTree = ""; }; @@ -181,6 +182,7 @@ D047FA7F24C3E21000AF17C5 = { isa = PBXGroup; children = ( + D07E164425037264008B10D0 /* SerializableBloomFilter */, D0C7D45224F76169001EBDBB /* Assets.xcassets */, D085C3BB25008DEC008A6C5E /* DB */, D0C7D46824F76169001EBDBB /* Extensions */, diff --git a/SerializableBloomFilter/.gitignore b/SerializableBloomFilter/.gitignore new file mode 100644 index 0000000..95c4320 --- /dev/null +++ b/SerializableBloomFilter/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +/.build +/Packages +/*.xcodeproj +xcuserdata/ diff --git a/SerializableBloomFilter/Package.swift b/SerializableBloomFilter/Package.swift new file mode 100644 index 0000000..4625cf3 --- /dev/null +++ b/SerializableBloomFilter/Package.swift @@ -0,0 +1,25 @@ +// swift-tools-version:5.3 + +import PackageDescription + +let package = Package( + name: "SerializableBloomFilter", + platforms: [ + .iOS(.v14), + .macOS(.v11) + ], + products: [ + .library( + name: "SerializableBloomFilter", + targets: ["SerializableBloomFilter"]) + ], + dependencies: [], + targets: [ + .target( + name: "SerializableBloomFilter", + dependencies: []), + .testTarget( + name: "SerializableBloomFilterTests", + dependencies: ["SerializableBloomFilter"]) + ] +) diff --git a/SerializableBloomFilter/Sources/SerializableBloomFilter/Bits.swift b/SerializableBloomFilter/Sources/SerializableBloomFilter/Bits.swift new file mode 100644 index 0000000..6b63972 --- /dev/null +++ b/SerializableBloomFilter/Sources/SerializableBloomFilter/Bits.swift @@ -0,0 +1,67 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +// Adapted from https://github.com/dduan/BitArray + +import Foundation + +struct Bits { + let count: Int + + private var bytes: [UInt8] + + init(count: Int) { + self.count = count + + var (byteCount, bitRemainder) = count.quotientAndRemainder(dividingBy: Self.bitsInByte) + + byteCount += bitRemainder > 0 ? 1 : 0 + + bytes = [UInt8](repeating: 0, count: byteCount) + } + + init(bytes: [UInt8], count: Int) { + self.bytes = bytes + self.count = count + } +} + +extension Bits { + var data: Data { Data(bytes) } + + subscript(index: Int) -> Bool { + get { + let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte) + + return bytes[byteCount] & mask(index: bitPosition) > 0 + } + + set { + let (byteCount, bitPosition) = index.quotientAndRemainder(dividingBy: Self.bitsInByte) + + if newValue { + bytes[byteCount] |= mask(index: bitPosition) + } else { + bytes[byteCount] &= ~mask(index: bitPosition) + } + } + } +} + +private extension Bits { + static let bitsInByte = 8 + + func mask(index: Int) -> UInt8 { + switch index { + case 0: return 0b00000001 + case 1: return 0b00000010 + case 2: return 0b00000100 + case 3: return 0b00001000 + case 4: return 0b00010000 + case 5: return 0b00100000 + case 6: return 0b01000000 + case 7: return 0b10000000 + default: + fatalError("Invalid index: \(index)") + } + } +} diff --git a/SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift b/SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift new file mode 100644 index 0000000..dcc681b --- /dev/null +++ b/SerializableBloomFilter/Sources/SerializableBloomFilter/SerializableBloomFilter.swift @@ -0,0 +1,56 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +import Foundation + +// https://en.wikipedia.org/wiki/Bloom_filter +// https://khanlou.com/2018/09/bloom-filters/ +// This implementation uses deterministic hashing functions so it can be serialized / deserialized + +struct SerializableBloomFilter { + private var items: Bits + + init() { + items = Bits(count: Self.itemCount) + } + + init(serialization: Data) throws { + items = Bits(bytes: Array(serialization), count: Self.itemCount) + } +} + +extension SerializableBloomFilter { + var serialization: Data { items.data } + + mutating func insert(_ newMember: String) { + for index in Self.indices(newMember) { + items[index] = true + } + } + + func contains(_ member: String) -> Bool { + Self.indices(member).map { items[$0] }.allSatisfy { $0 } + } +} + +private extension SerializableBloomFilter { + static let itemCount = 1024 + static let hashFunctions = [djb2, sdbm] + + static func indices(_ string: String) -> [Int] { + hashFunctions.map { abs($0(string)) % itemCount } + } +} + +// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 + +private func djb2(_ string: String) -> Int { + string.unicodeScalars.map(\.value).reduce(5381) { + ($0 << 5) &+ $0 &+ Int($1) + } +} + +private func sdbm(_ string: String) -> Int { + string.unicodeScalars.map(\.value).reduce(0) { + Int($1) &+ ($0 << 6) &+ ($0 << 16) - $0 + } +} diff --git a/SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift b/SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift new file mode 100644 index 0000000..976c7f9 --- /dev/null +++ b/SerializableBloomFilter/Tests/SerializableBloomFilterTests/SerializableBloomFilterTests.swift @@ -0,0 +1,31 @@ +@testable import SerializableBloomFilter +import XCTest + +final class SerializableBloomFilterTests: XCTestCase { + func testContains() { + var filter = SerializableBloomFilter() + + filter.insert("lol") + filter.insert("ok") + + XCTAssert(filter.contains("lol")) + XCTAssert(filter.contains("ok")) + XCTAssertFalse(filter.contains("wtf")) + XCTAssertFalse(filter.contains("no")) + } + + func testSerialization() throws { + var filter = SerializableBloomFilter() + + filter.insert("lol") + filter.insert("ok") + + let serialization = filter.serialization + let deserializedFilter = try SerializableBloomFilter(serialization: serialization) + + XCTAssert(deserializedFilter.contains("lol")) + XCTAssert(filter.contains("ok")) + XCTAssertFalse(deserializedFilter.contains("wtf")) + XCTAssertFalse(filter.contains("no")) + } +}