1
0
mirror of https://github.com/metabolist/metatext synced 2025-01-14 19:07:05 +01:00

Bloom filter data property and initialization

This commit is contained in:
Justin Mazzocchi 2020-09-08 02:07:15 -07:00
parent 6e0dcd6398
commit f02b1e033a
No known key found for this signature in database
GPG Key ID: E223E6937AAFB01C
3 changed files with 38 additions and 7 deletions

View File

@ -7,14 +7,16 @@ import Foundation
struct BitArray {
private var bytes: [UInt8]
init(byteCount: Int) {
bytes = [UInt8](repeating: 0, count: byteCount)
init(data: Data) {
bytes = Array(data)
}
}
extension BitArray {
var bitCount: Int { bytes.count * Self.bitsInByte }
var data: Data { Data(bytes) }
subscript(index: Int) -> Bool {
get {
let (byteIndex, bitIndex) = Self.byteAndBitIndices(index: index)

View File

@ -11,32 +11,43 @@ enum BloomFilterError: Error {
}
public struct BloomFilter<T: DeterministicallyHashable>: Codable {
enum CodingKeys: String, CodingKey {
case hashes
case bits = "data"
}
public let hashes: [Hash]
private var data: BitArray
private var bits: BitArray
public init(hashes: Set<Hash>, byteCount: Int) throws {
try self.init(hashes: hashes, data: Data(repeating: 0, count: byteCount))
}
public init(hashes: Set<Hash>, data: Data) throws {
guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided }
// Sort the hashes for consistent decoding output
self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue })
data = BitArray(byteCount: byteCount)
bits = BitArray(data: data)
}
}
public extension BloomFilter {
var data: Data { bits.data }
mutating func insert(_ newMember: T) {
for index in indices(newMember) {
data[index] = true
bits[index] = true
}
}
func contains(_ member: T) -> Bool {
indices(member).allSatisfy { data[$0] }
indices(member).allSatisfy { bits[$0] }
}
}
private extension BloomFilter {
func indices(_ member: T) -> [Int] {
hashes.map { abs($0.apply(member)) % data.bitCount }
hashes.map { abs($0.apply(member)) % bits.bitCount }
}
}

View File

@ -34,6 +34,24 @@ final class CodableBloomFilterTests: XCTestCase {
XCTAssertFalse(sut.contains("no"))
}
func testData() throws {
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
sut.insert("lol")
sut.insert("ok")
XCTAssertEqual(sut.data, Data([0, 16, 0, 0, 0, 2, 0, 144]))
}
func testFromData() throws {
let sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], data: Data([0, 16, 0, 0, 0, 2, 0, 144]))
XCTAssert(sut.contains("lol"))
XCTAssert(sut.contains("ok"))
XCTAssertFalse(sut.contains("wtf"))
XCTAssertFalse(sut.contains("no"))
}
func testCoding() throws {
var sut = try BloomFilter<String>(hashes: [.sdbm32, .djb232], byteCount: 8)
let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8)