|
| 1 | +/** |
| 2 | + * Bloom filter with double hashing (Kirsch–Mitzenmacher optimisation). |
| 3 | + * Useful for: probabilistic membership checks with no false negatives. |
| 4 | + */ |
| 5 | +export interface BloomFilterOptions { |
| 6 | + /** Total number of bits in the filter (m). */ |
| 7 | + size: number; |
| 8 | + /** Number of hash functions (k). */ |
| 9 | + hashes: number; |
| 10 | + /** Optional seed for hashing. */ |
| 11 | + seed?: number; |
| 12 | +} |
| 13 | + |
| 14 | +export class BloomFilter { |
| 15 | + private bits: Uint8Array; |
| 16 | + private m: number; |
| 17 | + private k: number; |
| 18 | + private seed: number; |
| 19 | + |
| 20 | + constructor(options: BloomFilterOptions) { |
| 21 | + const { size, hashes, seed = 0x9e3779b1 } = options; |
| 22 | + if (size <= 0 || !Number.isFinite(size)) throw new Error('Invalid bloom size'); |
| 23 | + if (hashes <= 0 || !Number.isFinite(hashes)) throw new Error('Invalid hash count'); |
| 24 | + this.m = size | 0; |
| 25 | + this.k = hashes | 0; |
| 26 | + this.seed = seed | 0; |
| 27 | + this.bits = new Uint8Array(Math.ceil(this.m / 8)); |
| 28 | + } |
| 29 | + |
| 30 | + /** Adds a value to the filter. */ |
| 31 | + add(value: string | number | Uint8Array): void { |
| 32 | + const { h1, h2 } = this.doubleHash(value); |
| 33 | + for (let i = 0; i < this.k; i += 1) { |
| 34 | + const idx = this.indexFor(h1, h2, i); |
| 35 | + this.setBit(idx); |
| 36 | + } |
| 37 | + } |
| 38 | + |
| 39 | + /** Checks if a value may be in the set (no false negatives). */ |
| 40 | + has(value: string | number | Uint8Array): boolean { |
| 41 | + const { h1, h2 } = this.doubleHash(value); |
| 42 | + for (let i = 0; i < this.k; i += 1) { |
| 43 | + const idx = this.indexFor(h1, h2, i); |
| 44 | + if (!this.getBit(idx)) return false; |
| 45 | + } |
| 46 | + return true; |
| 47 | + } |
| 48 | + |
| 49 | + /** Creates a Bloom filter sized for the given capacity and error rate. */ |
| 50 | + static fromCapacity(capacity: number, errorRate = 0.01, seed?: number): BloomFilter { |
| 51 | + if (capacity <= 0) throw new Error('Capacity must be > 0'); |
| 52 | + if (!(errorRate > 0 && errorRate < 1)) throw new Error('Error rate must be in (0,1)'); |
| 53 | + const ln2 = Math.log(2); |
| 54 | + const m = Math.ceil(-(capacity * Math.log(errorRate)) / (ln2 * ln2)); |
| 55 | + const k = Math.max(1, Math.round((m / capacity) * ln2)); |
| 56 | + return new BloomFilter({ size: m, hashes: k, seed }); |
| 57 | + } |
| 58 | + |
| 59 | + // ---- internals ---- |
| 60 | + private indexFor(h1: number, h2: number, i: number): number { |
| 61 | + // (h1 + i*h2) % m with unsigned wrapping |
| 62 | + const x = (h1 + Math.imul(i, h2)) >>> 0; |
| 63 | + return x % this.m; |
| 64 | + } |
| 65 | + |
| 66 | + private setBit(idx: number): void { |
| 67 | + const byte = idx >> 3; |
| 68 | + const mask = 1 << (idx & 7); |
| 69 | + this.bits[byte] |= mask; |
| 70 | + } |
| 71 | + |
| 72 | + private getBit(idx: number): boolean { |
| 73 | + const byte = idx >> 3; |
| 74 | + const mask = 1 << (idx & 7); |
| 75 | + return (this.bits[byte] & mask) !== 0; |
| 76 | + } |
| 77 | + |
| 78 | + private doubleHash(value: string | number | Uint8Array): { h1: number; h2: number } { |
| 79 | + const bytes = toBytes(value); |
| 80 | + // Two 32-bit hashes derived from FNV-1a mixed with seed |
| 81 | + const h1 = fnv1a(bytes, this.seed); |
| 82 | + const h2 = fnv1a(bytes, h1 ^ 0x85ebca6b); |
| 83 | + // Ensure non-zero step to avoid repeating same position |
| 84 | + return { h1, h2: (h2 | 1) >>> 0 }; |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +function toBytes(value: string | number | Uint8Array): Uint8Array { |
| 89 | + if (typeof value === 'string') { |
| 90 | + return new TextEncoder().encode(value); |
| 91 | + } |
| 92 | + if (typeof value === 'number') { |
| 93 | + const v = new DataView(new ArrayBuffer(8)); |
| 94 | + v.setFloat64(0, value, true); |
| 95 | + return new Uint8Array(v.buffer); |
| 96 | + } |
| 97 | + return value; |
| 98 | +} |
| 99 | + |
| 100 | +// FNV-1a 32-bit |
| 101 | +function fnv1a(data: Uint8Array, seed = 0): number { |
| 102 | + let hash = (0x811c9dc5 ^ seed) >>> 0; |
| 103 | + for (let i = 0; i < data.length; i += 1) { |
| 104 | + hash ^= data[i]; |
| 105 | + hash = Math.imul(hash, 0x01000193); |
| 106 | + } |
| 107 | + return hash >>> 0; |
| 108 | +} |
| 109 | + |
| 110 | +export const __internals = { fnv1a }; |
0 commit comments