Access control and genericization

This commit is contained in:
Justin Mazzocchi 2020-09-05 16:33:09 -07:00
parent d0e9b2a1e3
commit 414f979f94
No known key found for this signature in database
GPG Key ID: E223E6937AAFB01C
5 changed files with 43 additions and 20 deletions

View File

@ -6,44 +6,44 @@ import Foundation
// https://khanlou.com/2018/09/bloom-filters/ // https://khanlou.com/2018/09/bloom-filters/
// This implementation uses deterministic hashing functions so it can be serialized / deserialized // This implementation uses deterministic hashing functions so it can be serialized / deserialized
struct BloomFilter { public struct BloomFilter<T: DeterministicallyHashable> {
let hashes: [Hash] public let hashes: [Hash]
let bits: Int public let bits: Int
private var data: BitArray private var data: BitArray
init(hashes: [Hash], bits: Int) { public init(hashes: [Hash], bits: Int) {
self.hashes = hashes self.hashes = hashes
self.bits = bits self.bits = bits
data = BitArray(count: bits) data = BitArray(count: bits)
} }
} }
extension BloomFilter { public extension BloomFilter {
enum Hash: String, Codable { enum Hash: String, Codable {
case djb2 case djb2
case sdbm case sdbm
} }
mutating func insert(_ newMember: String) { mutating func insert(_ newMember: T) {
for index in indices(newMember) { for index in indices(newMember) {
data[index] = true data[index] = true
} }
} }
func contains(_ member: String) -> Bool { func contains(_ member: T) -> Bool {
indices(member).map { data[$0] }.allSatisfy { $0 } indices(member).map { data[$0] }.allSatisfy { $0 }
} }
} }
extension BloomFilter: Codable { extension BloomFilter: Codable {
enum CodingKeys: String, CodingKey { private enum CodingKeys: String, CodingKey {
case hashes case hashes
case bits case bits
case data case data
} }
init(from decoder: Decoder) throws { public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self) let container = try decoder.container(keyedBy: CodingKeys.self)
hashes = try container.decode([Hash].self, forKey: .hashes) hashes = try container.decode([Hash].self, forKey: .hashes)
@ -51,7 +51,7 @@ extension BloomFilter: Codable {
data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits) data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits)
} }
func encode(to encoder: Encoder) throws { public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self) var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(hashes, forKey: .hashes) try container.encode(hashes, forKey: .hashes)
@ -61,16 +61,18 @@ extension BloomFilter: Codable {
} }
private extension BloomFilter { private extension BloomFilter {
func indices(_ string: String) -> [Int] { func indices(_ member: T) -> [Int] {
hashes.map { abs($0.apply(string)) % bits } hashes.map { abs($0.apply(member)) % bits }
} }
} }
// https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 // https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73
private extension BloomFilter.Hash { private extension BloomFilter.Hash {
func apply(_ string: String) -> Int { func apply(_ member: T) -> Int {
string.unicodeScalars.map(\.value).reduce(initial, then) Array(member.deterministicallyHashableData)
.map(Int.init)
.reduce(initial, then)
} }
var initial: Int { var initial: Int {
@ -80,12 +82,12 @@ private extension BloomFilter.Hash {
} }
} }
func then(result: Int, next: UInt32) -> Int { func then(result: Int, next: Int) -> Int {
switch self { switch self {
case .djb2: case .djb2:
return (result << 5) &+ result &+ Int(next) return (result << 5) &+ result &+ next
case .sdbm: case .sdbm:
return Int(next) &+ (result << 6) &+ (result << 16) - result return next &+ (result << 6) &+ (result << 16) - result
} }
} }
} }

View File

@ -0,0 +1,7 @@
// Copyright © 2020 Metabolist. All rights reserved.
import Foundation
public protocol DeterministicallyHashable {
var deterministicallyHashableData: Data { get }
}

View File

@ -0,0 +1,7 @@
// Copyright © 2020 Metabolist. All rights reserved.
import Foundation
extension Data: DeterministicallyHashable {
public var deterministicallyHashableData: Data { self }
}

View File

@ -0,0 +1,7 @@
// Copyright © 2020 Metabolist. All rights reserved.
import Foundation
extension String: DeterministicallyHashable {
public var deterministicallyHashableData: Data { Data(utf8) }
}

View File

@ -3,7 +3,7 @@ import XCTest
final class CodableBloomFilterTests: XCTestCase { final class CodableBloomFilterTests: XCTestCase {
func testContains() { func testContains() {
var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 1024) var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 1024)
sut.insert("lol") sut.insert("lol")
sut.insert("ok") sut.insert("ok")
@ -15,7 +15,7 @@ final class CodableBloomFilterTests: XCTestCase {
} }
func testCoding() throws { func testCoding() throws {
var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64) var sut = BloomFilter<String>(hashes: [.djb2, .sdbm], bits: 64)
let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8) let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8)
sut.insert("lol") sut.insert("lol")
@ -29,7 +29,7 @@ final class CodableBloomFilterTests: XCTestCase {
XCTAssertEqual(serialization, expectedSerialization) XCTAssertEqual(serialization, expectedSerialization)
let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization) let decoded = try JSONDecoder().decode(BloomFilter<String>.self, from: serialization)
XCTAssert(decoded.contains("lol")) XCTAssert(decoded.contains("lol"))
XCTAssert(decoded.contains("ok")) XCTAssert(decoded.contains("ok"))