diff --git a/ROADMAP.md b/ROADMAP.md index 559b9e6..d20bd3d 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -109,7 +109,7 @@ - [x] Segment tree range query helper **Compression & encoding** - [x] Run-length encoding (RLE) - - [ ] Huffman coding utilities + - [x] Huffman coding utilities - [ ] LZ77 dictionary compression helper - [x] Base64 encode/decode utilities **Geometric & numeric utilities** diff --git a/docs/index.d.ts b/docs/index.d.ts index 66d3269..17026d3 100644 --- a/docs/index.d.ts +++ b/docs/index.d.ts @@ -109,6 +109,9 @@ export const examples: { readonly runLengthDecode: 'examples/rle.ts'; readonly base64Encode: 'examples/base64.ts'; readonly base64Decode: 'examples/base64.ts'; + readonly createHuffmanTable: 'examples/huffman.ts'; + readonly huffmanEncode: 'examples/huffman.ts'; + readonly huffmanDecode: 'examples/huffman.ts'; }; readonly performance: { readonly debounce: 'examples/requestDedup.ts'; @@ -3072,6 +3075,17 @@ export function runLengthDecode(pairs: ReadonlyArray): string; export function base64Encode(input: string | Uint8Array): string; export function base64Decode(b64: string): Uint8Array; +/** + * Huffman coding utilities for entropy compression. + * Use for: building compact prefix codes for textual payloads. + * Import: data/huffman.ts + */ +export interface HuffmanEncodedResult { bitString: string; table: HuffmanTable } +export type HuffmanTable = Record; +export function createHuffmanTable(input: string): HuffmanTable; +export function huffmanEncode(input: string): HuffmanEncodedResult; +export function huffmanDecode(encoded: string, table: Readonly): string; + /** * Disjoint Set Union (Union-Find) with path compression and union by size. * Use for: connectivity queries, Kruskal MST, clustering. diff --git a/examples/huffman.ts b/examples/huffman.ts new file mode 100644 index 0000000..e692156 --- /dev/null +++ b/examples/huffman.ts @@ -0,0 +1,10 @@ +import { huffmanEncode, huffmanDecode, createHuffmanTable } from '../src/index.js'; + +const input = 'hello huffman'; +const table = createHuffmanTable(input); + +const { bitString } = huffmanEncode(input); +const decoded = huffmanDecode(bitString, table); + +console.log('bits', bitString); +console.log('roundtrip', decoded); diff --git a/src/data/huffman.ts b/src/data/huffman.ts new file mode 100644 index 0000000..deb2a30 --- /dev/null +++ b/src/data/huffman.ts @@ -0,0 +1,139 @@ +interface HuffmanNode { + char?: string; + freq: number; + left?: HuffmanNode; + right?: HuffmanNode; +} + +export type HuffmanTable = Record; + +function buildFrequencyMap(input: string): Map { + const freq = new Map(); + for (const char of input) { + freq.set(char, (freq.get(char) ?? 0) + 1); + } + return freq; +} + +function buildTree(freq: Map): HuffmanNode | null { + const nodes: HuffmanNode[] = Array.from(freq.entries(), ([char, count]) => ({ + char, + freq: count, + })); + + if (nodes.length === 0) { + return null; + } + + if (nodes.length === 1) { + return nodes[0]; + } + + while (nodes.length > 1) { + nodes.sort((a, b) => a.freq - b.freq); + const left = nodes.shift()!; + const right = nodes.shift()!; + nodes.push({ freq: left.freq + right.freq, left, right }); + } + + return nodes[0] ?? null; +} + +function generateCodes(node: HuffmanNode | null): HuffmanTable { + const codes: HuffmanTable = {}; + if (!node) { + return codes; + } + + const traverse = (current: HuffmanNode, prefix: string) => { + if (!current.left && !current.right) { + if (current.char === undefined) { + throw new Error('Huffman leaf node missing symbol.'); + } + codes[current.char] = prefix.length === 0 ? '0' : prefix; + return; + } + if (current.left) traverse(current.left, `${prefix}0`); + if (current.right) traverse(current.right, `${prefix}1`); + }; + + traverse(node, ''); + return codes; +} + +export function createHuffmanTable(input: string): HuffmanTable { + if (typeof input !== 'string') { + throw new TypeError('input must be a string.'); + } + const tree = buildTree(buildFrequencyMap(input)); + return generateCodes(tree); +} + +export interface HuffmanEncodedResult { + bitString: string; + table: HuffmanTable; +} + +export function huffmanEncode(input: string): HuffmanEncodedResult { + if (typeof input !== 'string') { + throw new TypeError('input must be a string.'); + } + if (input.length === 0) { + return { bitString: '', table: {} }; + } + + const table = createHuffmanTable(input); + let bitString = ''; + for (const char of input) { + const code = table[char]; + if (!code) { + throw new Error(`Missing Huffman code for character: ${char}`); + } + bitString += code; + } + return { bitString, table }; +} + +export function huffmanDecode(encoded: string, table: Readonly): string { + if (typeof encoded !== 'string') { + throw new TypeError('encoded must be a string of bits.'); + } + + if (encoded.length === 0) { + return ''; + } + + if (!table || Object.keys(table).length === 0) { + throw new Error('table must contain at least one Huffman code.'); + } + + const reverse = new Map(); + for (const [char, code] of Object.entries(table)) { + if (typeof code !== 'string' || code.length === 0) { + throw new Error(`Invalid Huffman code for character: ${char}`); + } + reverse.set(code, char); + } + + let current = ''; + let result = ''; + for (const bit of encoded) { + if (bit !== '0' && bit !== '1') { + throw new Error('Encoded string must contain only 0 or 1 characters.'); + } + current += bit; + const char = reverse.get(current); + if (char !== undefined) { + result += char; + current = ''; + } + } + + if (current.length > 0) { + throw new Error('Encoded string ended with an incomplete Huffman code.'); + } + + return result; +} + +export const __internals = { buildFrequencyMap, buildTree, generateCodes }; diff --git a/src/index.ts b/src/index.ts index 84cbf45..49dd21c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -107,6 +107,9 @@ export const examples = { runLengthDecode: 'examples/rle.ts', base64Encode: 'examples/base64.ts', base64Decode: 'examples/base64.ts', + createHuffmanTable: 'examples/huffman.ts', + huffmanEncode: 'examples/huffman.ts', + huffmanDecode: 'examples/huffman.ts', }, performance: { debounce: 'examples/requestDedup.ts', @@ -1086,6 +1089,13 @@ export { runLengthEncode, runLengthDecode } from './data/rle.js'; * Example file: examples/base64.ts */ export { base64Encode, base64Decode } from './data/base64.js'; +/** + * Huffman coding utilities for entropy compression. + * + * Example file: examples/huffman.ts + */ +export { createHuffmanTable, huffmanEncode, huffmanDecode } from './data/huffman.js'; +export type { HuffmanTable, HuffmanEncodedResult } from './data/huffman.js'; export type { TreeNode, diff --git a/tests/huffman.test.ts b/tests/huffman.test.ts new file mode 100644 index 0000000..227f52b --- /dev/null +++ b/tests/huffman.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from 'vitest'; + +import { createHuffmanTable, huffmanEncode, huffmanDecode } from '../src/data/huffman.js'; + +describe('Huffman coding', () => { + it('encodes and decodes text', () => { + const input = 'the quick brown fox jumps over the lazy dog'; + const table = createHuffmanTable(input); + const { bitString } = huffmanEncode(input); + expect(bitString).not.toHaveLength(0); + const decoded = huffmanDecode(bitString, table); + expect(decoded).toBe(input); + }); + + it('handles single character strings', () => { + const input = 'aaaaaa'; + const { bitString, table } = huffmanEncode(input); + expect(new Set(bitString).size).toBeLessThanOrEqual(1); + expect(huffmanDecode(bitString, table)).toBe(input); + }); + + it('validates encoded data and tables', () => { + const table = createHuffmanTable('abc'); + expect(() => huffmanDecode('abc', table)).toThrow('0 or 1'); + expect(() => huffmanDecode('0101', {})).toThrow('at least one'); + const bitString = 'ab'.split('').map((char) => table[char] ?? '').join(''); + expect(() => huffmanDecode(`${bitString}1`, table)).toThrow('incomplete'); + }); +}); diff --git a/tests/index.test.ts b/tests/index.test.ts index 7a71f32..4672365 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -27,6 +27,9 @@ describe('package entry point', () => { expect(examples.data.applyJsonDiffSelective).toBe('examples/jsonDiff.ts'); expect(examples.data.diffTree).toBe('examples/treeDiff.ts'); expect(examples.data.applyTreeDiff).toBe('examples/treeDiff.ts'); + expect(examples.data.createHuffmanTable).toBe('examples/huffman.ts'); + expect(examples.data.huffmanEncode).toBe('examples/huffman.ts'); + expect(examples.data.huffmanDecode).toBe('examples/huffman.ts'); expect(examples.visual.hexToRgb).toBe('examples/color.ts'); expect(examples.visual.rgbToHex).toBe('examples/color.ts'); expect(examples.visual.rgbToHsl).toBe('examples/color.ts'); @@ -136,6 +139,9 @@ describe('package entry point', () => { | 'runLengthDecode' | 'base64Encode' | 'base64Decode' + | 'createHuffmanTable' + | 'huffmanEncode' + | 'huffmanDecode' >(); expectTypeOf>().toEqualTypeOf<