From 6e0dcd6398f981fbffdb251590a470259de4044f Mon Sep 17 00:00:00 2001 From: Justin Mazzocchi <2831158+jzzocc@users.noreply.github.com> Date: Mon, 7 Sep 2020 23:49:58 -0700 Subject: [PATCH] Refactoring --- .../Sources/CodableBloomFilter/BitArray.swift | 10 +- .../CodableBloomFilter/BloomFilter.swift | 15 ++- .../DeterministicallyHashable.swift | 2 +- .../Data+DeterministicallyHashable.swift | 2 +- .../String+DeterministicallyHashable.swift | 2 +- .../{DeterministicHasher.swift => Hash.swift} | 34 +++--- .../CodableBloomFilterTests.swift | 102 +++++++++++++++--- .../Services/InstanceFilterService.swift | 2 +- .../InstanceFilterTests.swift | 2 +- 9 files changed, 122 insertions(+), 49 deletions(-) rename CodableBloomFilter/Sources/CodableBloomFilter/{DeterministicHasher.swift => Hash.swift} (63%) diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift index a589a3f..1c1cdf9 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BitArray.swift @@ -8,11 +8,7 @@ struct BitArray { private var bytes: [UInt8] init(byteCount: Int) { - self.bytes = [UInt8](repeating: 0, count: byteCount) - } - - init(data: Data) { - bytes = Array(data) + bytes = [UInt8](repeating: 0, count: byteCount) } } @@ -40,9 +36,7 @@ extension BitArray { extension BitArray: Codable { init(from decoder: Decoder) throws { - let container = try decoder.singleValueContainer() - - bytes = Array(try container.decode(Data.self)) + bytes = Array(try decoder.singleValueContainer().decode(Data.self)) } func encode(to encoder: Encoder) throws { diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift index b9e7f4e..3b1d3b7 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/BloomFilter.swift @@ -6,14 +6,19 @@ import Foundation // https://khanlou.com/2018/09/bloom-filters/ // This implementation uses deterministic hashing functions so it can conform to Codable +enum BloomFilterError: Error { + case noHashesProvided +} + public struct BloomFilter: Codable { - public let hashers: [DeterministicHasher] + public let hashes: [Hash] private var data: BitArray - public init(hashers: Set, byteCount: Int) { - // Sort the hashers for consistent decoding output - self.hashers = Array(hashers.sorted { $0.rawValue < $1.rawValue }) + public init(hashes: Set, byteCount: Int) throws { + guard !hashes.isEmpty else { throw BloomFilterError.noHashesProvided } + // Sort the hashes for consistent decoding output + self.hashes = Array(hashes.sorted { $0.rawValue < $1.rawValue }) data = BitArray(byteCount: byteCount) } } @@ -32,6 +37,6 @@ public extension BloomFilter { private extension BloomFilter { func indices(_ member: T) -> [Int] { - hashers.map { abs($0.apply(member)) % data.bitCount } + hashes.map { abs($0.apply(member)) % data.bitCount } } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift index 33f42e5..7c9f2b2 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicallyHashable.swift @@ -3,5 +3,5 @@ import Foundation public protocol DeterministicallyHashable { - var hashableData: Data { get } + var dataForHashingDeterministically: Data { get } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift index 39eebde..d336139 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/Data+DeterministicallyHashable.swift @@ -3,5 +3,5 @@ import Foundation extension Data: DeterministicallyHashable { - public var hashableData: Data { self } + public var dataForHashingDeterministically: Data { self } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift index dfa7683..177f563 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Extensions/String+DeterministicallyHashable.swift @@ -3,5 +3,5 @@ import Foundation extension String: DeterministicallyHashable { - public var hashableData: Data { Data(utf8) } + public var dataForHashingDeterministically: Data { Data(utf8) } } diff --git a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift b/CodableBloomFilter/Sources/CodableBloomFilter/Hash.swift similarity index 63% rename from CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift rename to CodableBloomFilter/Sources/CodableBloomFilter/Hash.swift index ef883c5..90f0f97 100644 --- a/CodableBloomFilter/Sources/CodableBloomFilter/DeterministicHasher.swift +++ b/CodableBloomFilter/Sources/CodableBloomFilter/Hash.swift @@ -2,17 +2,17 @@ import Foundation -public enum DeterministicHasher: String, Codable { - case djb2 - case djb2a - case sdbm - case fnv1 - case fnv1a +public enum Hash: String, Codable { + case djb232 + case djb2a32 + case sdbm32 + case fnv132 + case fnv1a32 } -extension DeterministicHasher { +extension Hash { func apply(_ hashable: DeterministicallyHashable) -> Int { - Int(Array(hashable.hashableData) + Int(Array(hashable.dataForHashingDeterministically) .map(UInt32.init) .reduce(offsetBasis, hash)) } @@ -21,28 +21,28 @@ extension DeterministicHasher { // http://www.cse.yorku.ca/~oz/hash.html // http://www.isthe.com/chongo/tech/comp/fnv/ -private extension DeterministicHasher { +private extension Hash { static let fnvPrime: UInt32 = 16777619 var offsetBasis: UInt32 { switch self { - case .djb2, .djb2a: return 5381 - case .sdbm: return 0 - case .fnv1, .fnv1a: return 2166136261 + case .djb232, .djb2a32: return 5381 + case .sdbm32: return 0 + case .fnv132, .fnv1a32: return 2166136261 } } func hash(result: UInt32, next: UInt32) -> UInt32 { switch self { - case .djb2: + case .djb232: return (result << 5) &+ result &+ next - case .djb2a: + case .djb2a32: return (result << 5) &+ result ^ next - case .sdbm: + case .sdbm32: return next &+ (result << 6) &+ (result << 16) &- result - case .fnv1: + case .fnv132: return (result &* Self.fnvPrime) ^ next - case .fnv1a: + case .fnv1a32: return (result ^ next) &* Self.fnvPrime } } diff --git a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift index d86ca22..5e68479 100644 --- a/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift +++ b/CodableBloomFilter/Tests/CodableBloomFilterTests/CodableBloomFilterTests.swift @@ -4,17 +4,26 @@ import XCTest final class CodableBloomFilterTests: XCTestCase { - - func testHashers() { - XCTAssertEqual(DeterministicHasher.djb2.apply("hash"), 2090320585) - XCTAssertEqual(DeterministicHasher.djb2a.apply("hash"), 2087809207) - XCTAssertEqual(DeterministicHasher.sdbm.apply("hash"), 385600046) - XCTAssertEqual(DeterministicHasher.fnv1.apply("hash"), 3616638997) - XCTAssertEqual(DeterministicHasher.fnv1a.apply("hash"), 3469047761) + func testHashes() { + XCTAssertEqual(Hash.djb232.apply("hash"), 2090320585) + XCTAssertEqual(Hash.djb2a32.apply("hash"), 2087809207) + XCTAssertEqual(Hash.sdbm32.apply("hash"), 385600046) + XCTAssertEqual(Hash.fnv132.apply("hash"), 3616638997) + XCTAssertEqual(Hash.fnv1a32.apply("hash"), 3469047761) } - func testContains() { - var sut = BloomFilter(hashers: [.djb2, .sdbm, .fnv1, .fnv1a], byteCount: 128) + func noHashesProvided() throws { + XCTAssertThrowsError(try BloomFilter(hashes: [], byteCount: 8)) { + guard case BloomFilterError.noHashesProvided = $0 else { + XCTFail("Expected no hashers provided error") + + return + } + } + } + + func testContains() throws { + var sut = try BloomFilter(hashes: [.sdbm32, .djb232], byteCount: 8) sut.insert("lol") sut.insert("ok") @@ -26,8 +35,8 @@ final class CodableBloomFilterTests: XCTestCase { } func testCoding() throws { - var sut = BloomFilter(hashers: [.sdbm, .djb2], byteCount: 8) - let expectedSerialization = Data(#"{"data":"ABAAAAACAJA=","hashers":["djb2","sdbm"]}"#.utf8) + var sut = try BloomFilter(hashes: [.sdbm32, .djb232], byteCount: 8) + let expectedData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","sdbm32"]}"#.utf8) sut.insert("lol") sut.insert("ok") @@ -36,15 +45,80 @@ final class CodableBloomFilterTests: XCTestCase { encoder.outputFormatting = .sortedKeys - let serialization = try encoder.encode(sut) + let data = try encoder.encode(sut) - XCTAssertEqual(serialization, expectedSerialization) + XCTAssertEqual(data, expectedData) - let decoded = try JSONDecoder().decode(BloomFilter.self, from: serialization) + let decoded = try JSONDecoder().decode(BloomFilter.self, from: data) XCTAssert(decoded.contains("lol")) XCTAssert(decoded.contains("ok")) XCTAssertFalse(decoded.contains("wtf")) XCTAssertFalse(decoded.contains("no")) } + + func testInvalidHash() throws { + let invalidData = Data(#"{"data":"ABAAAAACAJA=","hashes":["djb232","invalid"]}"#.utf8) + + XCTAssertThrowsError(try JSONDecoder().decode(BloomFilter.self, from: invalidData)) { + guard case DecodingError.dataCorrupted = $0 else { + XCTFail("Expected data corrupted error") + + return + } + } + } + + func testDataEncodingStrategy() throws { + var sut = try BloomFilter(hashes: [.sdbm32, .djb232], byteCount: 8) + let expectedData = Data(#"{"data":"0010000000020090","hashes":["djb232","sdbm32"]}"#.utf8) + + sut.insert("lol") + sut.insert("ok") + + let encoder = JSONEncoder() + + encoder.outputFormatting = .sortedKeys + encoder.dataEncodingStrategy = .custom { data, encoder in + var container = encoder.singleValueContainer() + + try container.encode(data.map { String(format: "%02.2hhx", $0) }.joined()) + } + + let data = try encoder.encode(sut) + + XCTAssertEqual(data, expectedData) + } + + func testDataDecodingStrategy() throws { + let data = Data(#"{"data":"0010000000020090","hashes":["djb232","sdbm32"]}"#.utf8) + let decoder = JSONDecoder() + + decoder.dataDecodingStrategy = .custom { decoder in + let container = try decoder.singleValueContainer() + let string = try container.decode(String.self) + var bytes = [UInt8]() + var i = string.startIndex + + while i != string.endIndex { + let j = string.index(i, offsetBy: 2) + + guard let byte = UInt8(string[i...self, from: data) + + XCTAssert(sut.contains("lol")) + XCTAssert(sut.contains("ok")) + XCTAssertFalse(sut.contains("wtf")) + XCTAssertFalse(sut.contains("no")) + } } diff --git a/ServiceLayer/Sources/ServiceLayer/Services/InstanceFilterService.swift b/ServiceLayer/Sources/ServiceLayer/Services/InstanceFilterService.swift index c35fcfc..0db50e0 100644 --- a/ServiceLayer/Sources/ServiceLayer/Services/InstanceFilterService.swift +++ b/ServiceLayer/Sources/ServiceLayer/Services/InstanceFilterService.swift @@ -62,7 +62,7 @@ private extension InstanceFilterService { static let updatedFilterUserDefaultsKey = "updatedFilter" // Ugly, but baking this into the compiled app instead of loading the data from the bundle is more secure // swiftlint:disable line_length - static let defaultFilterData = #"{"hashers":["djb2","djb2a","fnv1","fnv1a","sdbm"],"data":"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAIAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAgAAAAAQAAAAAABAAACAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAABAAAEAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAIAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAIAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAIAAAQAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAQAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAADAAAAAAAAAAAAA=="}"# + static let defaultFilterData = #"{"hashes":["djb232","djb2a32","fnv132","fnv1a32","sdbm32"],"data":"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAIAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAgAAAAAQAAAAAABAAACAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAABAAAEAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAIAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAIAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAIAAAQAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAQAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAADAAAAAAAAAAAAA=="}"# .data(using: .utf8)! // swiftlint:enable line_length // swiftlint:disable force_try diff --git a/ServiceLayer/Tests/ServiceLayerTests/InstanceFilterTests.swift b/ServiceLayer/Tests/ServiceLayerTests/InstanceFilterTests.swift index dc26715..57e2526 100644 --- a/ServiceLayer/Tests/ServiceLayerTests/InstanceFilterTests.swift +++ b/ServiceLayer/Tests/ServiceLayerTests/InstanceFilterTests.swift @@ -29,7 +29,7 @@ class InstanceFilterServiceTests: XCTestCase { XCTAssertTrue(sut.isFiltered(url: previouslyFilteredInstanceURL)) XCTAssertFalse(sut.isFiltered(url: newlyFilteredInstanceURL)) - var updatedFilter = BloomFilter(hashers: [.djb2, .sdbm], byteCount: 16) + var updatedFilter = try BloomFilter(hashes: [.djb232, .sdbm32], byteCount: 16) updatedFilter.insert("instance.filtered")