Skip to content

Commit 49e14d8

Browse files
committed
feat(data): add BloomFilter; docs + tests + example
1 parent bfd5bd7 commit 49e14d8

6 files changed

Lines changed: 166 additions & 0 deletions

File tree

docs/index.d.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ export const examples: {
9898
readonly applyTreeDiff: 'examples/treeDiff.ts';
9999
readonly UnionFind: 'examples/graph.ts';
100100
readonly BinaryHeap: 'examples/binaryHeap.ts';
101+
readonly BloomFilter: 'examples/bloomFilter.ts';
101102
};
102103
readonly performance: {
103104
readonly debounce: 'examples/requestDedup.ts';
@@ -2943,6 +2944,23 @@ export class BinaryHeap<T> {
29432944
pop(): T | undefined;
29442945
}
29452946

2947+
/**
2948+
* Bloom filter (probabilistic set with no false negatives).
2949+
* Use for: quick membership checks, caching fronts, anti-spam.
2950+
* Import: data/bloomFilter.ts
2951+
*/
2952+
export interface BloomFilterOptions {
2953+
size: number;
2954+
hashes: number;
2955+
seed?: number;
2956+
}
2957+
export class BloomFilter {
2958+
constructor(options: BloomFilterOptions);
2959+
add(value: string | number | Uint8Array): void;
2960+
has(value: string | number | Uint8Array): boolean;
2961+
static fromCapacity(capacity: number, errorRate?: number, seed?: number): BloomFilter;
2962+
}
2963+
29462964
/**
29472965
* Disjoint Set Union (Union-Find) with path compression and union by size.
29482966
* Use for: connectivity queries, Kruskal MST, clustering.

examples/bloomFilter.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { BloomFilter } from '../src/index.js';
2+
3+
// Create a Bloom filter for ~1000 items with ~1% false positive rate
4+
const bf = BloomFilter.fromCapacity(1000, 0.01, 42);
5+
6+
bf.add('apple');
7+
bf.add('banana');
8+
bf.add('cherry');
9+
10+
console.log('has apple?', bf.has('apple'));
11+
console.log('has grape?', bf.has('grape'));
12+

src/data/bloomFilter.ts

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/**
2+
* Bloom filter with double hashing (Kirsch–Mitzenmacher optimisation).
3+
* Useful for: probabilistic membership checks with no false negatives.
4+
*/
5+
export interface BloomFilterOptions {
6+
/** Total number of bits in the filter (m). */
7+
size: number;
8+
/** Number of hash functions (k). */
9+
hashes: number;
10+
/** Optional seed for hashing. */
11+
seed?: number;
12+
}
13+
14+
export class BloomFilter {
15+
private bits: Uint8Array;
16+
private m: number;
17+
private k: number;
18+
private seed: number;
19+
20+
constructor(options: BloomFilterOptions) {
21+
const { size, hashes, seed = 0x9e3779b1 } = options;
22+
if (size <= 0 || !Number.isFinite(size)) throw new Error('Invalid bloom size');
23+
if (hashes <= 0 || !Number.isFinite(hashes)) throw new Error('Invalid hash count');
24+
this.m = size | 0;
25+
this.k = hashes | 0;
26+
this.seed = seed | 0;
27+
this.bits = new Uint8Array(Math.ceil(this.m / 8));
28+
}
29+
30+
/** Adds a value to the filter. */
31+
add(value: string | number | Uint8Array): void {
32+
const { h1, h2 } = this.doubleHash(value);
33+
for (let i = 0; i < this.k; i += 1) {
34+
const idx = this.indexFor(h1, h2, i);
35+
this.setBit(idx);
36+
}
37+
}
38+
39+
/** Checks if a value may be in the set (no false negatives). */
40+
has(value: string | number | Uint8Array): boolean {
41+
const { h1, h2 } = this.doubleHash(value);
42+
for (let i = 0; i < this.k; i += 1) {
43+
const idx = this.indexFor(h1, h2, i);
44+
if (!this.getBit(idx)) return false;
45+
}
46+
return true;
47+
}
48+
49+
/** Creates a Bloom filter sized for the given capacity and error rate. */
50+
static fromCapacity(capacity: number, errorRate = 0.01, seed?: number): BloomFilter {
51+
if (capacity <= 0) throw new Error('Capacity must be > 0');
52+
if (!(errorRate > 0 && errorRate < 1)) throw new Error('Error rate must be in (0,1)');
53+
const ln2 = Math.log(2);
54+
const m = Math.ceil(-(capacity * Math.log(errorRate)) / (ln2 * ln2));
55+
const k = Math.max(1, Math.round((m / capacity) * ln2));
56+
return new BloomFilter({ size: m, hashes: k, seed });
57+
}
58+
59+
// ---- internals ----
60+
private indexFor(h1: number, h2: number, i: number): number {
61+
// (h1 + i*h2) % m with unsigned wrapping
62+
const x = (h1 + Math.imul(i, h2)) >>> 0;
63+
return x % this.m;
64+
}
65+
66+
private setBit(idx: number): void {
67+
const byte = idx >> 3;
68+
const mask = 1 << (idx & 7);
69+
this.bits[byte] |= mask;
70+
}
71+
72+
private getBit(idx: number): boolean {
73+
const byte = idx >> 3;
74+
const mask = 1 << (idx & 7);
75+
return (this.bits[byte] & mask) !== 0;
76+
}
77+
78+
private doubleHash(value: string | number | Uint8Array): { h1: number; h2: number } {
79+
const bytes = toBytes(value);
80+
// Two 32-bit hashes derived from FNV-1a mixed with seed
81+
const h1 = fnv1a(bytes, this.seed);
82+
const h2 = fnv1a(bytes, h1 ^ 0x85ebca6b);
83+
// Ensure non-zero step to avoid repeating same position
84+
return { h1, h2: (h2 | 1) >>> 0 };
85+
}
86+
}
87+
88+
function toBytes(value: string | number | Uint8Array): Uint8Array {
89+
if (typeof value === 'string') {
90+
return new TextEncoder().encode(value);
91+
}
92+
if (typeof value === 'number') {
93+
const v = new DataView(new ArrayBuffer(8));
94+
v.setFloat64(0, value, true);
95+
return new Uint8Array(v.buffer);
96+
}
97+
return value;
98+
}
99+
100+
// FNV-1a 32-bit
101+
function fnv1a(data: Uint8Array, seed = 0): number {
102+
let hash = (0x811c9dc5 ^ seed) >>> 0;
103+
for (let i = 0; i < data.length; i += 1) {
104+
hash ^= data[i];
105+
hash = Math.imul(hash, 0x01000193);
106+
}
107+
return hash >>> 0;
108+
}
109+
110+
export const __internals = { fnv1a };

src/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ export const examples = {
9696
applyTreeDiff: 'examples/treeDiff.ts',
9797
UnionFind: 'examples/graph.ts',
9898
BinaryHeap: 'examples/binaryHeap.ts',
99+
BloomFilter: 'examples/bloomFilter.ts',
99100
},
100101
performance: {
101102
debounce: 'examples/requestDedup.ts',
@@ -1033,6 +1034,12 @@ export { UnionFind } from './data/unionFind.js';
10331034
* Example file: examples/binaryHeap.ts
10341035
*/
10351036
export { BinaryHeap } from './data/binaryHeap.js';
1037+
/**
1038+
* Bloom filter (probabilistic membership, no false negatives).
1039+
*
1040+
* Example file: examples/bloomFilter.ts
1041+
*/
1042+
export { BloomFilter } from './data/bloomFilter.js';
10361043

10371044
export type {
10381045
TreeNode,

tests/bloomFilter.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { BloomFilter } from '../src/index.js';
3+
4+
describe('BloomFilter', () => {
5+
it('adds items and reports membership without false negatives', () => {
6+
const bf = BloomFilter.fromCapacity(1000, 0.01, 123);
7+
const items = Array.from({ length: 200 }, (_, i) => `key-${i}`);
8+
for (const x of items) bf.add(x);
9+
for (const x of items) expect(bf.has(x)).toBe(true);
10+
11+
// Most not-added items should be reported as false; avoid strict assertions
12+
const probes = Array.from({ length: 50 }, (_, i) => `other-${i + 10000}`);
13+
const maybes = probes.map((p) => bf.has(p));
14+
// At least some must be false
15+
expect(maybes.some((v) => v === false)).toBe(true);
16+
});
17+
});
18+

tests/index.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ describe('package entry point', () => {
125125
| 'applyTreeDiff'
126126
| 'UnionFind'
127127
| 'BinaryHeap'
128+
| 'BloomFilter'
128129
>();
129130

130131
expectTypeOf<ExampleName<'search'>>().toEqualTypeOf<

0 commit comments

Comments
 (0)