Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
- [x] Boyer–Moore fast substring search
- [x] Suffix array construction utilities
- [x] Longest common subsequence (LCS) enhancements and diff helpers
- [x] Aho–Corasick multi-pattern automaton
**Data pipelines & utilities**
- [x] Flatten/unflatten helpers for nested structures
- [x] Pagination utilities for client-side paging
Expand Down
16 changes: 16 additions & 0 deletions docs/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ export const examples: {
readonly buildSuffixArray: 'examples/search.ts';
readonly longestCommonSubsequence: 'examples/search.ts';
readonly diffStrings: 'examples/search.ts';
readonly createAhoCorasick: 'examples/search.ts';
};
readonly data: {
readonly diff: 'examples/jsonDiff.ts';
Expand Down Expand Up @@ -2664,6 +2665,21 @@ export interface DiffOp {
export function longestCommonSubsequence(options: LCSOptions): LCSResult;
export function diffStrings(options: LCSOptions): DiffOp[];

/**
* Aho–Corasick multi-pattern automaton.
* Use for: scanning texts for many patterns efficiently with overlaps.
* Performance: O(n + m + z) where n=text length, m=total pattern length, z=matches.
* Import: search/ahoCorasick.ts
*/
export interface AhoBuildOptions {
patterns: ReadonlyArray<string>;
caseSensitive?: boolean;
}
export interface AhoAutomaton {
search(text: string): Record<string, number[]>;
}
export function createAhoCorasick(options: AhoBuildOptions): AhoAutomaton;

// ============================================================================
// 📊 DATA TOOLS
// ============================================================================
Expand Down
1 change: 1 addition & 0 deletions docs/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ Maximum Flow (Ford-Fulkerson) - Network flow

Rabin-Karp - Multiple pattern matching
Boyer-Moore - Fast single pattern search
Aho–Corasick - Multi-pattern automaton
Longest Common Subsequence - Diff algorithms
Suffix Array - Advanced pattern matching

Expand Down
4 changes: 4 additions & 0 deletions examples/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
buildSuffixArray,
longestCommonSubsequence,
diffStrings,
createAhoCorasick,
} from '../src/index.js';

const items = ['alpha', 'beta', 'delta', 'epsilon', 'gamma'];
Expand Down Expand Up @@ -43,3 +44,6 @@ console.log('LCS of dynamic/programming:', lcs);

const diff = diffStrings({ a: 'kitten', b: 'sitting' });
console.log('Diff between kitten and sitting:', diff);

const automaton = createAhoCorasick({ patterns: ['abra', 'cad'] });
console.log('Aho–Corasick matches in abracadabra:', automaton.search('abracadabra'));
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
{
"name": "bundle",
"path": "dist/index.js",
"limit": "40 KB"
"limit": "41 KB"
}
]
}
6 changes: 6 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ export const examples = {
buildSuffixArray: 'examples/search.ts',
longestCommonSubsequence: 'examples/search.ts',
diffStrings: 'examples/search.ts',
createAhoCorasick: 'examples/search.ts',
},
data: {
diff: 'examples/jsonDiff.ts',
Expand Down Expand Up @@ -924,6 +925,11 @@ export type { LCSOptions, LCSResult, DiffOp } from './search/lcs.js';
*/
export { levenshteinDistance } from './search/levenshtein.js';

/**
* Aho–Corasick multi-pattern automaton.
*/
export { createAhoCorasick } from './search/ahoCorasick.js';

// ============================================================================
// 📊 DATA PROCESSING
// ============================================================================
Expand Down
106 changes: 106 additions & 0 deletions src/search/ahoCorasick.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
export interface AhoBuildOptions {
patterns: ReadonlyArray<string>;
caseSensitive?: boolean;
}

export interface AhoAutomaton {
search(text: string): Record<string, number[]>;
}

interface Node {
next: Map<string, number>;
fail: number;
out: number[]; // indices into originalPatterns
}

export function createAhoCorasick(options: AhoBuildOptions): AhoAutomaton {
validateOptions(options);
const caseSensitive = options.caseSensitive ?? true;
const originalPatterns = options.patterns.slice();
const normalizedPatterns = caseSensitive
? originalPatterns
: originalPatterns.map((p) => p.toLowerCase());

const nodes: Node[] = [{ next: new Map(), fail: 0, out: [] }];

// Build trie
normalizedPatterns.forEach((pattern, idx) => {
if (pattern.length === 0) return;
let state = 0;
for (const ch of pattern) {
let to = nodes[state].next.get(ch);
if (to === undefined) {
to = nodes.length;
nodes[state].next.set(ch, to);
nodes.push({ next: new Map(), fail: 0, out: [] });
}
state = to;
}
nodes[state].out.push(idx);
});

// Build fail links via BFS
const queue: number[] = [];
for (const [, to] of nodes[0].next.entries()) {
nodes[to].fail = 0;
queue.push(to);
}
while (queue.length > 0) {
const v = queue.shift()!;
for (const [ch, to] of nodes[v].next.entries()) {
queue.push(to);
let f = nodes[v].fail;
while (f !== 0 && !nodes[f].next.has(ch)) {
f = nodes[f].fail;
}
if (nodes[f].next.has(ch)) {
f = nodes[f].next.get(ch)!;
}
nodes[to].fail = f;
nodes[to].out.push(...nodes[f].out);
}
}

function search(text: string): Record<string, number[]> {
const t = caseSensitive ? text : text.toLowerCase();
const results: Record<string, number[]> = {};
// Handle empty patterns returning all positions
for (let i = 0; i < originalPatterns.length; i += 1) {
if (normalizedPatterns[i].length === 0) {
results[originalPatterns[i]] = Array.from({ length: text.length + 1 }, (_, p) => p);
}
}

let state = 0;
for (let i = 0; i < t.length; i += 1) {
const ch = t[i];
while (state !== 0 && !nodes[state].next.has(ch)) {
state = nodes[state].fail;
}
if (nodes[state].next.has(ch)) {
state = nodes[state].next.get(ch)!;
}
if (nodes[state].out.length > 0) {
for (const patIdx of nodes[state].out) {
const pat = originalPatterns[patIdx];
const len = normalizedPatterns[patIdx].length;
const pos = i - len + 1;
if (!results[pat]) results[pat] = [];
results[pat].push(pos);
}
}
}
for (const pat of originalPatterns) {
if (!results[pat]) results[pat] = [];
}
return results;
}

return { search };
}

function validateOptions(options: AhoBuildOptions): void {
if (!Array.isArray(options.patterns) || options.patterns.length === 0) {
throw new Error('patterns must contain at least one pattern.');
}
}
22 changes: 22 additions & 0 deletions tests/ahoCorasick.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { describe, expect, it } from 'vitest';

import { createAhoCorasick } from '../src/index.js';

describe('createAhoCorasick', () => {
it('finds overlapping multi-pattern matches', () => {
const ac = createAhoCorasick({ patterns: ['ab', 'bc', 'abc'] });
const res = ac.search('ababc');
expect(res['ab']).toEqual([0, 2]);
expect(res['bc']).toEqual([3]);
expect(res['abc']).toEqual([2]);
});

it('supports case-insensitive matching and empty patterns', () => {
const ac = createAhoCorasick({ patterns: ['He', 'eL', ''], caseSensitive: false });
const res = ac.search('HeLlo');
expect(res['He']).toEqual([0]);
expect(res['eL']).toEqual([1]);
expect(res['']).toEqual([0, 1, 2, 3, 4, 5]);
});
});

1 change: 1 addition & 0 deletions tests/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ describe('package entry point', () => {
| 'buildSuffixArray'
| 'longestCommonSubsequence'
| 'diffStrings'
| 'createAhoCorasick'
>();

expectTypeOf<ExampleName<'gameplay'>>().toEqualTypeOf<
Expand Down