From 414f979f94e23b74695328a3b79a4fc4d260f155 Mon Sep 17 00:00:00 2001 From: Justin Mazzocchi <2831158+jzzocc@users.noreply.github.com> Date: Sat, 5 Sep 2020 16:33:09 -0700 Subject: [PATCH] Access control and genericization --- .../CodableBloomFilter/BloomFilter.swift | 36 ++++++++++--------- .../DeterministicallyHashable.swift | 7 ++++ .../Data+DeterministicallyHashable.swift | 7 ++++ .../String+DeterministicallyHashable.swift | 7 ++++ .../CodableBloomFilterTests.swift | 6 ++-- 5 files changed, 43 insertions(+), 20 deletions(-) create mode 100644 CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift create mode 100644 CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift create mode 100644 CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift index 7b955f4..de463e0 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift @@ -6,44 +6,44 @@ import Foundation // https://khanlou.com/2018/09/bloom-filters/ // This implementation uses deterministic hashing functions so it can be serialized / deserialized -struct BloomFilter { - let hashes: [Hash] - let bits: Int +public struct BloomFilter { + public let hashes: [Hash] + public let bits: Int private var data: BitArray - init(hashes: [Hash], bits: Int) { + public init(hashes: [Hash], bits: Int) { self.hashes = hashes self.bits = bits data = BitArray(count: bits) } } -extension BloomFilter { +public extension BloomFilter { enum Hash: String, Codable { case djb2 case sdbm } - mutating func insert(_ newMember: String) { + mutating func insert(_ newMember: T) { for index in indices(newMember) { data[index] = true } } - func contains(_ member: String) -> Bool { + func contains(_ member: T) -> Bool { indices(member).map { data[$0] }.allSatisfy { $0 } } } extension BloomFilter: Codable { - enum CodingKeys: String, CodingKey { + private enum CodingKeys: String, CodingKey { case hashes case bits case data } - init(from decoder: Decoder) throws { + public init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) hashes = try container.decode([Hash].self, forKey: .hashes) @@ -51,7 +51,7 @@ extension BloomFilter: Codable { data = BitArray(data: try container.decode(Data.self, forKey: .data), count: bits) } - func encode(to encoder: Encoder) throws { + public func encode(to encoder: Encoder) throws { var container = encoder.container(keyedBy: CodingKeys.self) try container.encode(hashes, forKey: .hashes) @@ -61,16 +61,18 @@ extension BloomFilter: Codable { } private extension BloomFilter { - func indices(_ string: String) -> [Int] { - hashes.map { abs($0.apply(string)) % bits } + func indices(_ member: T) -> [Int] { + hashes.map { abs($0.apply(member)) % bits } } } // https://gist.github.com/kharrison/2355182ac03b481921073c5cf6d77a73 private extension BloomFilter.Hash { - func apply(_ string: String) -> Int { - string.unicodeScalars.map(\.value).reduce(initial, then) + func apply(_ member: T) -> Int { + Array(member.deterministicallyHashableData) + .map(Int.init) + .reduce(initial, then) } var initial: Int { @@ -80,12 +82,12 @@ private extension BloomFilter.Hash { } } - func then(result: Int, next: UInt32) -> Int { + func then(result: Int, next: Int) -> Int { switch self { case .djb2: - return (result << 5) &+ result &+ Int(next) + return (result << 5) &+ result &+ next case .sdbm: - return Int(next) &+ (result << 6) &+ (result << 16) - result + return next &+ (result << 6) &+ (result << 16) - result } } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift new file mode 100644 index 0000000..0b4d598 --- /dev/null +++ b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift @@ -0,0 +1,7 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +import Foundation + +public protocol DeterministicallyHashable { + var deterministicallyHashableData: Data { get } +} diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift new file mode 100644 index 0000000..02e3450 --- /dev/null +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift @@ -0,0 +1,7 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +import Foundation + +extension Data: DeterministicallyHashable { + public var deterministicallyHashableData: Data { self } +} diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift new file mode 100644 index 0000000..1e36b67 --- /dev/null +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift @@ -0,0 +1,7 @@ +// Copyright © 2020 Metabolist. All rights reserved. + +import Foundation + +extension String: DeterministicallyHashable { + public var deterministicallyHashableData: Data { Data(utf8) } +} diff --git a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift index 165e84c..4a75c05 100644 --- a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift +++ b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift @@ -3,7 +3,7 @@ import XCTest final class CodableBloomFilterTests: XCTestCase { func testContains() { - var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 1024) + var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 1024) sut.insert("lol") sut.insert("ok") @@ -15,7 +15,7 @@ final class CodableBloomFilterTests: XCTestCase { } func testCoding() throws { - var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64) + var sut = BloomFilter(hashes: [.djb2, .sdbm], bits: 64) let expectedSerialization = Data(#"{"bits":64,"data":"ABAAAAACAJA=","hashes":["djb2","sdbm"]}"#.utf8) sut.insert("lol") @@ -29,7 +29,7 @@ final class CodableBloomFilterTests: XCTestCase { XCTAssertEqual(serialization, expectedSerialization) - let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization) + let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization) XCTAssert(decoded.contains("lol")) XCTAssert(decoded.contains("ok"))