diff --git a/ROADMAP.md b/ROADMAP.md index d20bd3d..391840b 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -110,7 +110,7 @@ **Compression & encoding** - [x] Run-length encoding (RLE) - [x] Huffman coding utilities - - [ ] LZ77 dictionary compression helper + - [x] LZ77 dictionary compression helper - [x] Base64 encode/decode utilities **Geometric & numeric utilities** - [ ] Closest pair of points solver for geometry toolkit diff --git a/docs/index.d.ts b/docs/index.d.ts index 17026d3..7b35926 100644 --- a/docs/index.d.ts +++ b/docs/index.d.ts @@ -112,6 +112,8 @@ export const examples: { readonly createHuffmanTable: 'examples/huffman.ts'; readonly huffmanEncode: 'examples/huffman.ts'; readonly huffmanDecode: 'examples/huffman.ts'; + readonly lz77Compress: 'examples/lz77.ts'; + readonly lz77Decompress: 'examples/lz77.ts'; }; readonly performance: { readonly debounce: 'examples/requestDedup.ts'; @@ -3086,6 +3088,16 @@ export function createHuffmanTable(input: string): HuffmanTable; export function huffmanEncode(input: string): HuffmanEncodedResult; export function huffmanDecode(encoded: string, table: Readonly): string; +/** + * LZ77 dictionary compression helper. + * Use for: streaming compression with sliding window back-references. + * Import: data/lz77.ts + */ +export interface Lz77Token { offset: number; length: number; next: string } +export interface Lz77Options { windowSize?: number; lookaheadSize?: number } +export function lz77Compress(input: string, options?: Lz77Options): Lz77Token[]; +export function lz77Decompress(tokens: ReadonlyArray): string; + /** * Disjoint Set Union (Union-Find) with path compression and union by size. * Use for: connectivity queries, Kruskal MST, clustering. diff --git a/examples/lz77.ts b/examples/lz77.ts new file mode 100644 index 0000000..4f8be8a --- /dev/null +++ b/examples/lz77.ts @@ -0,0 +1,6 @@ +import { lz77Compress, lz77Decompress } from '../src/index.js'; + +const input = 'abracadabra abracadabra'; +const tokens = lz77Compress(input, { windowSize: 12, lookaheadSize: 8 }); +console.log(tokens); +console.log(lz77Decompress(tokens)); diff --git a/src/data/lz77.ts b/src/data/lz77.ts new file mode 100644 index 0000000..7335d0f --- /dev/null +++ b/src/data/lz77.ts @@ -0,0 +1,83 @@ +export interface Lz77Token { + offset: number; + length: number; + next: string; +} + +export interface Lz77Options { + windowSize?: number; + lookaheadSize?: number; +} + +export function lz77Compress(input: string, options: Lz77Options = {}): Lz77Token[] { + if (typeof input !== 'string') { + throw new TypeError('input must be a string.'); + } + const windowSize = Math.max(1, Math.floor(options.windowSize ?? 32)); + const lookaheadSize = Math.max(1, Math.floor(options.lookaheadSize ?? 16)); + const tokens: Lz77Token[] = []; + let position = 0; + + while (position < input.length) { + const windowStart = Math.max(0, position - windowSize); + const window = input.slice(windowStart, position); + let bestOffset = 0; + let bestLength = 0; + + for (let offset = 1; offset <= window.length; offset += 1) { + let matchLength = 0; + while ( + matchLength < lookaheadSize && + position + matchLength < input.length && + window[window.length - offset + (matchLength % offset)] === input[position + matchLength] + ) { + matchLength += 1; + } + if (matchLength > bestLength) { + bestLength = matchLength; + bestOffset = offset; + } + } + + const nextChar = input[position + bestLength] ?? ''; + tokens.push({ offset: bestOffset, length: bestLength, next: nextChar }); + position += bestLength + 1; + } + + return tokens; +} + +export function lz77Decompress(tokens: ReadonlyArray): string { + let output = ''; + for (const token of tokens) { + if (!token) continue; + const { offset, length, next } = token; + if (offset < 0 || length < 0) { + throw new Error('offset and length must be non-negative.'); + } + if (offset === 0 || length === 0) { + if (next) { + output += next; + } + continue; + } + if (offset > output.length) { + throw new Error('offset exceeds output length.'); + } + let copied = ''; + for (let i = 0; i < length; i += 1) { + const char = output[output.length - offset + (i % offset)]; + if (char === undefined) { + throw new Error('Invalid offset/length combination.'); + } + copied += char; + } + output += copied; + if (next) { + output += next; + } + } + return output; +} + +export const __internals = { lz77Compress, lz77Decompress }; diff --git a/src/index.ts b/src/index.ts index 49dd21c..428acf2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -110,6 +110,8 @@ export const examples = { createHuffmanTable: 'examples/huffman.ts', huffmanEncode: 'examples/huffman.ts', huffmanDecode: 'examples/huffman.ts', + lz77Compress: 'examples/lz77.ts', + lz77Decompress: 'examples/lz77.ts', }, performance: { debounce: 'examples/requestDedup.ts', @@ -1096,6 +1098,13 @@ export { base64Encode, base64Decode } from './data/base64.js'; */ export { createHuffmanTable, huffmanEncode, huffmanDecode } from './data/huffman.js'; export type { HuffmanTable, HuffmanEncodedResult } from './data/huffman.js'; +/** + * LZ77 dictionary compression helpers for sliding window encoding. + * + * Example file: examples/lz77.ts + */ +export { lz77Compress, lz77Decompress } from './data/lz77.js'; +export type { Lz77Token, Lz77Options } from './data/lz77.js'; export type { TreeNode, diff --git a/tests/index.test.ts b/tests/index.test.ts index 4672365..e170ffc 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -30,6 +30,8 @@ describe('package entry point', () => { expect(examples.data.createHuffmanTable).toBe('examples/huffman.ts'); expect(examples.data.huffmanEncode).toBe('examples/huffman.ts'); expect(examples.data.huffmanDecode).toBe('examples/huffman.ts'); + expect(examples.data.lz77Compress).toBe('examples/lz77.ts'); + expect(examples.data.lz77Decompress).toBe('examples/lz77.ts'); expect(examples.visual.hexToRgb).toBe('examples/color.ts'); expect(examples.visual.rgbToHex).toBe('examples/color.ts'); expect(examples.visual.rgbToHsl).toBe('examples/color.ts'); @@ -142,6 +144,8 @@ describe('package entry point', () => { | 'createHuffmanTable' | 'huffmanEncode' | 'huffmanDecode' + | 'lz77Compress' + | 'lz77Decompress' >(); expectTypeOf>().toEqualTypeOf< diff --git a/tests/lz77.test.ts b/tests/lz77.test.ts new file mode 100644 index 0000000..0231e22 --- /dev/null +++ b/tests/lz77.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, it } from 'vitest'; + +import { lz77Compress, lz77Decompress } from '../src/data/lz77.js'; + +describe('LZ77 compression', () => { + it('round-trips a string with repeated sequences', () => { + const input = 'abracadabra abracadabra'; + const tokens = lz77Compress(input, { windowSize: 12, lookaheadSize: 8 }); + const decoded = lz77Decompress(tokens); + expect(decoded).toBe(input); + }); + + it('compresses highly repetitive data', () => { + const input = 'aaaaaaaaaaaaaaaabbbbbbbbbbbb'; + const tokens = lz77Compress(input, { windowSize: 16 }); + expect(tokens.length).toBeLessThan(input.length); + expect(lz77Decompress(tokens)).toBe(input); + }); + + it('validates tokens during decompression', () => { + expect(() => + lz77Decompress([ + { offset: 5, length: 2, next: 'a' }, + ]) + ).toThrow('offset exceeds'); + }); +});