From 2a9af4a2d92fab0491d80577dc98b2b327d37798 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 13:36:41 +0200
Subject: [PATCH 01/17] feat: add string-based API methods for validation,
links, and minification
Introduce `checkCodeString`, `checkLinksString`, and `minifyString` to the API, enabling string-based pipelines for validation, link checking, and minification. Updated documentation, type definitions, and tests to support the new methods.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
CHANGELOG.md | 8 +++
README.md | 36 ++++++++++++-
bin/hihtml.test.js | 100 ++++++++++++++++++++++++++++++++++--
package-lock.json | 4 +-
package.json | 2 +-
src/adapters/check-code.js | 12 +++++
src/adapters/check-links.js | 19 +++++++
src/adapters/minify.js | 37 ++++++++++---
src/index.d.ts | 22 ++++++++
src/index.js | 6 +--
src/index.types.ts | 8 ++-
11 files changed, 235 insertions(+), 19 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 49d57b9..11e0f54 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,14 @@ All notable changes to HiHTML are documented in this file, which is (mostly) AI-
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.3.0-beta] - 2026-05-13
+
+### Added
+
+* Added `checkCodeString(content, options?)` to the programmatic API: validates an HTML string and checks it for deprecated markup, mirroring `checkCode` for string-based pipelines
+* Added `checkLinksString(content, options?)`: checks all external http/https URLs found in an HTML string, mirroring `checkLinks` for string-based pipelines
+* Added `minifyString(content, options?)`: minifies an HTML string and returns it, without any file I/O—useful in content-pipeline contexts such as Eleventy transforms, middleware, and SSR handlers
+
## [1.2.0-beta] - 2026-05-11
### Added
diff --git a/README.md b/README.md
index 36a9038..7a25518 100644
--- a/README.md
+++ b/README.md
@@ -103,7 +103,7 @@ npx hihtml -q -a -i src -o dist
### 2. Programmatic API
```js
-import { checkCode, checkLinks, minify, collect } from 'hihtml';
+import { checkCode, checkCodeString, checkLinks, checkLinksString, minify, minifyString, collect } from 'hihtml';
const files = await collect('./src');
@@ -115,6 +115,11 @@ const links = await checkLinks(files);
const minification = await minify(files, files); // in-place
// { files: [{ path, sizeOriginal, sizeMinified }], saved }
+
+// String variants—same result types, no file I/O
+const minified = await minifyString('Hello world
');
+const codeGate = await checkCodeString('Nope
');
+const linksCleaned = await checkLinksString('Example');
```
#### `collect(dir, extensions?, excludedDirs?)`
@@ -131,6 +136,15 @@ Validates HTML files and checks for deprecated markup. Returns `Promise`—same shape as `checkCode`. Useful in content-pipeline contexts (Eleventy transforms, middleware, SSR) where HTML is available as a string rather than a file.
+
+* `options.preset`: HTML-validate preset name (default: `'standard'`)
+* `options.ignore`: List of HTML-validate rule IDs to suppress (default: `[]`)
+
+Note: `result.validation.files[0].path` and `result.deprecation.files[0].path` will be `'(string input)'`, not a real file path.
+
#### `checkLinks(filePaths, options?)`
Checks all external http/https URLs (`href`, `src`, `srcset`, `action` attributes) found in the given HTML files. Each unique URL is checked once; results are mapped back to every file it appears in. Returns `Promise`.
@@ -142,6 +156,19 @@ Checks all external http/https URLs (`href`, `src`, `srcset`, `action` attribute
Links are checked via HEAD request, falling back to GET on 405. 4xx and 5xx responses are reported as broken. Skipped URLs (from the ignore list) appear in results with `skipped: true` and are never counted as broken.
+#### `checkLinksString(content, options?)`
+
+Checks all external http/https URLs found in an HTML string. Returns `Promise`—same shape as `checkLinks`. Useful when HTML is available as a string rather than a file, e.g., to check links in a fetched document or API response.
+
+* `options.timeout`: Request timeout in milliseconds (default: `10000`)
+* `options.concurrency`: Maximum concurrent requests (default: `8`)
+* `options.warnOnPermanentRedirects`: Warn on 301/308 permanent redirects (default: `false`)
+* `options.ignore`: List of hostnames or URL prefixes to skip (default: `[]`)
+* `options.onProgress`: Called after each URL is checked
+* `options.onStart`: Called once with the total number of URLs to check
+
+Note: `result.files[0].path` will be `'(string input)'`, not a real file path. `result.countFileErrors` will always be `0`.
+
#### `minify(filePaths, outputPaths, options?)`
Minifies HTML files using HTML Minifier Next. Returns `Promise`.
@@ -150,6 +177,13 @@ Minifies HTML files using HTML Minifier Next. Returns `Promise`. Useful in content-pipeline contexts (Eleventy transforms, middleware, SSR) where HTML is available as a string rather than a file.
+
+* `options.preset`: HTML Minifier Next preset name (default: `'comprehensive'`)
+* `options.options`: Additional HTML Minifier Next options to merge with the preset
+
#### `loadConfig(cwd?, filePath?)`
Loads HiHTML configuration. When `filePath` is given, only that file is read (no CWD fallback); if it contains a `"hihtml"` key that value is used, otherwise the root object is used. Without `filePath`, reads `.hihtml.json` or the `"hihtml"` key in `package.json` from `cwd`. Returns `Promise`.
diff --git a/bin/hihtml.test.js b/bin/hihtml.test.js
index 772540a..0be1cb3 100644
--- a/bin/hihtml.test.js
+++ b/bin/hihtml.test.js
@@ -8,9 +8,9 @@ import assert from 'node:assert';
import { stripVTControlCharacters } from 'node:util';
import { validate } from '../src/adapters/validate.js';
-import { checkCode } from '../src/adapters/check-code.js';
-import { checkLinks } from '../src/adapters/check-links.js';
-import { minify } from '../src/adapters/minify.js';
+import { checkCode, checkCodeString } from '../src/adapters/check-code.js';
+import { checkLinks, checkLinksString } from '../src/adapters/check-links.js';
+import { minify, minifyString } from '../src/adapters/minify.js';
import { collect, read } from '../src/lib/files.js';
import { loadConfig } from '../src/lib/config.js';
@@ -682,6 +682,43 @@ describe('Check code', () => {
});
});
+// Programmatic API: `checkCodeString`
+
+describe('Check code string', () => {
+ test('Returns expected result shape', async () => {
+ const result = await checkCodeString(CLEAN_HTML);
+ assert.ok('validation' in result);
+ assert.ok('deprecation' in result);
+ assert.ok('countErrors' in result.validation);
+ assert.ok('countIssues' in result.deprecation);
+ });
+
+ test('Clean HTML reports no issues', async () => {
+ const result = await checkCodeString(CLEAN_HTML);
+ assert.strictEqual(result.validation.countErrors, 0);
+ assert.strictEqual(result.deprecation.countIssues, 0);
+ });
+
+ test('Detects deprecated markup', async () => {
+ const result = await checkCodeString(DEPRECATED_HTML);
+ assert.ok(result.deprecation.countIssues > 0);
+ assert.ok(result.deprecation.files[0].elements.includes('center'));
+ });
+
+ test('Detects validation errors', async () => {
+ const result = await checkCodeString(INVALID_HTML);
+ assert.ok(result.validation.countErrors > 0);
+ });
+
+ test('Passes ignore list through to validation result', async () => {
+ const base = await checkCodeString(INVALID_HTML);
+ const ruleIds = [...new Set(base.validation.files[0].messages.map(m => m.ruleId))];
+ const result = await checkCodeString(INVALID_HTML, { ignore: ruleIds });
+ assert.strictEqual(result.validation.countErrors, 0);
+ assert.strictEqual(result.validation.countIgnored, base.validation.files[0].messages.length);
+ });
+});
+
// Programmatic API: `checkLinks`
describe('Check links', () => {
@@ -827,6 +864,38 @@ describe('Check links', () => {
});
});
+// Programmatic API: `checkLinksString`
+
+describe('Check links string', () => {
+ test('Returns expected result shape', async () => {
+ const result = await checkLinksString(`TOK`);
+ assert.ok('files' in result);
+ assert.ok('countBroken' in result);
+ assert.ok('countChecked' in result);
+ assert.ok(Array.isArray(result.files));
+ });
+
+ test('Reports ok for 200 response', async () => {
+ const result = await checkLinksString(`TOK`);
+ assert.strictEqual(result.countBroken, 0);
+ assert.strictEqual(result.countChecked, 1);
+ assert.strictEqual(result.files[0].links[0].ok, true);
+ });
+
+ test('Reports broken for 404 response', async () => {
+ const result = await checkLinksString(`TBroken`);
+ assert.strictEqual(result.countBroken, 1);
+ assert.strictEqual(result.files[0].links[0].ok, false);
+ });
+
+ test('No http/https links returns empty result', async () => {
+ const result = await checkLinksString(CLEAN_HTML);
+ assert.strictEqual(result.countBroken, 0);
+ assert.strictEqual(result.countChecked, 0);
+ assert.strictEqual(result.files[0].links.length, 0);
+ });
+});
+
// Programmatic API: `minify`
describe('Minify files', () => {
@@ -902,6 +971,31 @@ describe('Minify files', () => {
});
});
+// Programmatic API: `minifyString`
+
+describe('Minify string', () => {
+ test('Returns a string', async () => {
+ const result = await minifyString(CLEAN_HTML);
+ assert.strictEqual(typeof result, 'string');
+ });
+
+ test('Output is not larger than input', async () => {
+ const result = await minifyString(CLEAN_HTML);
+ assert.ok(Buffer.byteLength(result) <= Buffer.byteLength(CLEAN_HTML));
+ });
+
+ test('Collapses whitespace with default preset', async () => {
+ const result = await minifyString('T Hello world
');
+ assert.ok(!result.includes(' Hello'));
+ });
+
+ test('Respects options override', async () => {
+ const loose = 'T Hello world
';
+ const result = await minifyString(loose, { options: { collapseWhitespace: false } });
+ assert.ok(result.includes(' Hello'));
+ });
+});
+
// Programmatic API: `read`
describe('Read files', () => {
diff --git a/package-lock.json b/package-lock.json
index 0c89197..22e7303 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "hihtml",
- "version": "1.2.0-beta",
+ "version": "1.3.0-beta",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "hihtml",
- "version": "1.2.0-beta",
+ "version": "1.3.0-beta",
"license": "MIT",
"dependencies": {
"commander": "^14.0.3",
diff --git a/package.json b/package.json
index af6b183..1a8c4c4 100644
--- a/package.json
+++ b/package.json
@@ -49,5 +49,5 @@
},
"type": "module",
"types": "src/index.d.ts",
- "version": "1.2.0-beta"
+ "version": "1.3.0-beta"
}
diff --git a/src/adapters/check-code.js b/src/adapters/check-code.js
index 5e05e05..8645acd 100644
--- a/src/adapters/check-code.js
+++ b/src/adapters/check-code.js
@@ -69,3 +69,15 @@ export async function checkCode(filePaths, { preset = 'standard', ignore = [], c
]);
return { validation: validateResult, deprecation: deprecatedResult };
}
+
+const SYNTHETIC_PATH = '(string input)';
+
+/**
+ * Validate an HTML string and check for deprecated markup.
+ * @param {string} content
+ * @param {{ preset?: string, ignore?: string[] }} [options]
+ * @returns {Promise}
+ */
+export async function checkCodeString(content, { preset = 'standard', ignore = [] } = {}) {
+ return checkCode([SYNTHETIC_PATH], { preset, ignore, contents: new Map([[SYNTHETIC_PATH, content]]) });
+}
\ No newline at end of file
diff --git a/src/adapters/check-links.js b/src/adapters/check-links.js
index 5888884..048fdea 100644
--- a/src/adapters/check-links.js
+++ b/src/adapters/check-links.js
@@ -265,3 +265,22 @@ export async function checkLinks(filePaths, {
const countFileErrors = files.filter(f => f.error !== undefined).length;
return { files, countBroken, countChecked: toCheck.size, countSkipped, countFileErrors };
}
+
+const SYNTHETIC_PATH = '(string input)';
+
+/**
+ * Check all external http/https URLs found in an HTML string.
+ * @param {string} content
+ * @param {{
+ * concurrency?: number,
+ * timeout?: number,
+ * warnOnPermanentRedirects?: boolean,
+ * ignore?: string[],
+ * onProgress?: () => void,
+ * onStart?: (total: number) => void,
+ * }} [options]
+ * @returns {Promise}
+ */
+export async function checkLinksString(content, options = {}) {
+ return checkLinks([SYNTHETIC_PATH], { ...options, contents: new Map([[SYNTHETIC_PATH, content]]) });
+}
\ No newline at end of file
diff --git a/src/adapters/minify.js b/src/adapters/minify.js
index 5f536bb..d3e5621 100644
--- a/src/adapters/minify.js
+++ b/src/adapters/minify.js
@@ -17,13 +17,12 @@ import { DEFAULT_CONCURRENCY, runWithConcurrency } from '../lib/concurrency.js';
*/
/**
- * Minify HTML files using HTML Minifier Next.
- * @param {string[]} filePaths - Input file paths
- * @param {string[]} outputPaths - Output file paths (parallel to filePaths; same value = in-place)
- * @param {{ preset?: string, options?: Record, concurrency?: number, contents?: Map, onProgress?: () => void }} [opts]
- * @returns {Promise}
+ * Load HTML Minifier Next and resolve preset and extra options into a merged options object.
+ * @param {string} preset
+ * @param {Record} options
+ * @returns {Promise<{ htmlMinify: Function, resolvedOptions: Record }>}
*/
-export async function minify(filePaths, outputPaths, { preset = 'comprehensive', options = {}, concurrency = DEFAULT_CONCURRENCY, contents, onProgress } = {}) {
+async function loadMinifier(preset, options) {
let htmlMinify, getPreset;
try {
({ minify: htmlMinify, getPreset } = await import('html-minifier-next'));
@@ -38,7 +37,29 @@ export async function minify(filePaths, outputPaths, { preset = 'comprehensive',
throw new Error(`HTML Minifier Next API error—the package may have breaking changes: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
}
- const resolvedOptions = { ...presetOptions, ...options };
+ return { htmlMinify, resolvedOptions: { ...presetOptions, ...options } };
+}
+
+/**
+ * Minify an HTML string using HTML Minifier Next.
+ * @param {string} content
+ * @param {{ preset?: string, options?: Record }} [opts]
+ * @returns {Promise}
+ */
+export async function minifyString(content, { preset = 'comprehensive', options = {} } = {}) {
+ const { htmlMinify, resolvedOptions } = await loadMinifier(preset, options);
+ return htmlMinify(content, resolvedOptions);
+}
+
+/**
+ * Minify HTML files using HTML Minifier Next.
+ * @param {string[]} filePaths - Input file paths
+ * @param {string[]} outputPaths - Output file paths (parallel to filePaths; same value = in-place)
+ * @param {{ preset?: string, options?: Record, concurrency?: number, contents?: Map, onProgress?: () => void }} [opts]
+ * @returns {Promise}
+ */
+export async function minify(filePaths, outputPaths, { preset = 'comprehensive', options = {}, concurrency = DEFAULT_CONCURRENCY, contents, onProgress } = {}) {
+ const { htmlMinify, resolvedOptions } = await loadMinifier(preset, options);
if (outputPaths.length !== filePaths.length) {
throw new Error(`outputPaths length (${outputPaths.length}) must match filePaths length (${filePaths.length})`);
@@ -85,4 +106,4 @@ export async function minify(filePaths, outputPaths, { preset = 'comprehensive',
const saved = files.reduce((acc, f) => f.error ? acc : acc + Math.max(0, (f.sizeOriginal || 0) - (f.sizeMinified || 0)), 0);
return { files, saved };
-}
+}
\ No newline at end of file
diff --git a/src/index.d.ts b/src/index.d.ts
index 5188ece..056e5cc 100644
--- a/src/index.d.ts
+++ b/src/index.d.ts
@@ -107,6 +107,11 @@ export declare function checkCode(
options?: { preset?: string; ignore?: string[]; concurrency?: number; contents?: Map; onProgress?: () => void }
): Promise;
+export declare function checkCodeString(
+ content: string,
+ options?: { preset?: string; ignore?: string[] }
+): Promise;
+
export declare function checkLinks(
filePaths: string[],
options?: {
@@ -120,8 +125,25 @@ export declare function checkLinks(
}
): Promise;
+export declare function checkLinksString(
+ content: string,
+ options?: {
+ concurrency?: number;
+ timeout?: number;
+ warnOnPermanentRedirects?: boolean;
+ ignore?: string[];
+ onProgress?: () => void;
+ onStart?: (total: number) => void;
+ }
+): Promise;
+
export declare function minify(
filePaths: string[],
outputPaths: string[],
options?: { preset?: string; options?: Record; concurrency?: number; contents?: Map; onProgress?: () => void }
): Promise;
+
+export declare function minifyString(
+ content: string,
+ options?: { preset?: string; options?: Record }
+): Promise;
\ No newline at end of file
diff --git a/src/index.js b/src/index.js
index 67f5a5f..fd0a09c 100644
--- a/src/index.js
+++ b/src/index.js
@@ -1,5 +1,5 @@
-export { checkCode } from './adapters/check-code.js';
-export { checkLinks } from './adapters/check-links.js';
-export { minify } from './adapters/minify.js';
+export { checkCode, checkCodeString } from './adapters/check-code.js';
+export { checkLinks, checkLinksString } from './adapters/check-links.js';
+export { minify, minifyString } from './adapters/minify.js';
export { collect, read, HTML_EXTENSIONS, EXCLUDED_DIRS } from './lib/files.js';
export { loadConfig } from './lib/config.js';
diff --git a/src/index.types.ts b/src/index.types.ts
index c395c7d..f287c8e 100644
--- a/src/index.types.ts
+++ b/src/index.types.ts
@@ -11,8 +11,11 @@ import type {
import {
checkCode,
+ checkCodeString,
checkLinks,
+ checkLinksString,
minify,
+ minifyString,
collect,
read,
loadConfig,
@@ -33,11 +36,14 @@ export type {
export {
checkCode,
+ checkCodeString,
checkLinks,
+ checkLinksString,
minify,
+ minifyString,
collect,
read,
loadConfig,
HTML_EXTENSIONS,
EXCLUDED_DIRS,
-};
+};
\ No newline at end of file
From 9db738d4232e8e7842e8d0b7921253e86d876bdf Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 13:47:33 +0200
Subject: [PATCH 02/17] feat: extend URL extraction to support unquoted
attributes
Enhanced link-checking functionality to detect URLs in unquoted attributes such as `href=https://example.com`. Updated regex, tests, and documentation to reflect this improvement.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
CHANGELOG.md | 8 +++---
README.md | 2 +-
bin/hihtml.test.js | 54 +++++++++++++++++++++++++++++++++++++
src/adapters/check-links.js | 4 +--
4 files changed, 62 insertions(+), 6 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 11e0f54..bce2d6f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,9 +8,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Added
-* Added `checkCodeString(content, options?)` to the programmatic API: validates an HTML string and checks it for deprecated markup, mirroring `checkCode` for string-based pipelines
-* Added `checkLinksString(content, options?)`: checks all external http/https URLs found in an HTML string, mirroring `checkLinks` for string-based pipelines
-* Added `minifyString(content, options?)`: minifies an HTML string and returns it, without any file I/O—useful in content-pipeline contexts such as Eleventy transforms, middleware, and SSR handlers
+* Added string-based functions to programmatic API:
+ - `checkCodeString(content, options?)` validates an HTML string and checks it for deprecated markup, mirroring `checkCode` for string-based pipelines
+ - `checkLinksString(content, options?)` checks all external http/https URLs found in an HTML string, mirroring `checkLinks` for string-based pipelines
+ - `minifyString(content, options?)` minifies an HTML string and returns it, without any file I/O—useful in content-pipeline contexts such as Eleventy transforms, middleware, and SSR handlers
+* Extended URL extraction in link checking to also detect URLs in unquoted attributes (e.g., `href=https://example.com`, which is valid HTML)
## [1.2.0-beta] - 2026-05-11
diff --git a/README.md b/README.md
index 7a25518..d79bc8c 100644
--- a/README.md
+++ b/README.md
@@ -119,7 +119,7 @@ const minification = await minify(files, files); // in-place
// String variants—same result types, no file I/O
const minified = await minifyString('Hello world
');
const codeGate = await checkCodeString('Nope
');
-const linksCleaned = await checkLinksString('Example');
+const linksCleaned = await checkLinksString('Example');
```
#### `collect(dir, extensions?, excludedDirs?)`
diff --git a/bin/hihtml.test.js b/bin/hihtml.test.js
index 0be1cb3..44e51bc 100644
--- a/bin/hihtml.test.js
+++ b/bin/hihtml.test.js
@@ -896,6 +896,60 @@ describe('Check links string', () => {
});
});
+// Programmatic API: URL extraction (attributes and quote styles)
+
+describe('URL extraction', () => {
+ const ok = () => `${testServerBase}/ok`;
+ const found = async (html) => {
+ const r = await checkLinksString(html);
+ return { checked: r.countChecked, broken: r.countBroken };
+ };
+
+ test('`href` double-quoted', async () => {
+ assert.deepStrictEqual(await found(`L`), { checked: 1, broken: 0 });
+ });
+
+ test('`href` single-quoted', async () => {
+ assert.deepStrictEqual(await found(`L`), { checked: 1, broken: 0 });
+ });
+
+ test('`href` unquoted', async () => {
+ assert.deepStrictEqual(await found(`L`), { checked: 1, broken: 0 });
+ });
+
+ test('`src` double-quoted', async () => {
+ assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
+ });
+
+ test('`src` single-quoted', async () => {
+ assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
+ });
+
+ test('`src` unquoted', async () => {
+ assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
+ });
+
+ test('`action` double-quoted', async () => {
+ assert.deepStrictEqual(await found(``), { checked: 1, broken: 0 });
+ });
+
+ test('`action` single-quoted', async () => {
+ assert.deepStrictEqual(await found(``), { checked: 1, broken: 0 });
+ });
+
+ test('`action` unquoted', async () => {
+ assert.deepStrictEqual(await found(``), { checked: 1, broken: 0 });
+ });
+
+ test('`srcset` double-quoted', async () => {
+ assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
+ });
+
+ test('`srcset` single-quoted', async () => {
+ assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
+ });
+});
+
// Programmatic API: `minify`
describe('Minify files', () => {
diff --git a/src/adapters/check-links.js b/src/adapters/check-links.js
index 048fdea..9819aff 100644
--- a/src/adapters/check-links.js
+++ b/src/adapters/check-links.js
@@ -49,9 +49,9 @@ function extractUrls(content) {
const urls = new Set();
let m;
- const attrRe = /\b(?:href|src|action)=(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)')/gi;
+ const attrRe = /\b(?:href|src|action)=(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)'|(https?:\/\/[^\s"'`=<>]+))/gi;
while ((m = attrRe.exec(content)) !== null) {
- const rawUrl = m[1] ?? m[2];
+ const rawUrl = m[1] ?? m[2] ?? m[3];
try { urls.add(new URL(rawUrl).href.split('#')[0]); } catch { /* skip malformed URLs */ }
}
From 1cd3e6a7201f72e056a2b54ae37dbd145d39dac5 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:03:44 +0200
Subject: [PATCH 03/17] refactor: improve performance across multiple modules
Optimized directory traversal by parallelizing subdirectory handling. Cached HtmlValidate and HTML Minifier Next instances per preset to reduce redundant initialization. Enhanced link-checker performance with precompiled regexes and efficient ignore-list processing.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
CHANGELOG.md | 9 +++++++
src/adapters/check-links.js | 52 ++++++++++++++++++++++++-------------
src/adapters/minify.js | 31 ++++++++++++++--------
src/adapters/validate.js | 31 +++++++++++++++++-----
src/lib/files.js | 4 ++-
5 files changed, 90 insertions(+), 37 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bce2d6f..ba41789 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- `minifyString(content, options?)` minifies an HTML string and returns it, without any file I/O—useful in content-pipeline contexts such as Eleventy transforms, middleware, and SSR handlers
* Extended URL extraction in link checking to also detect URLs in unquoted attributes (e.g., `href=https://example.com`, which is valid HTML)
+### Changed
+
+* Improved performance across several areas:
+ - Directory traversal now fans out subdirectories in parallel (`Promise.all`)
+ - `HtmlValidate` instances are cached per preset, avoiding re-initialization across calls to `validate()`/`checkCode()`
+ - URL-extraction regexes in the link checker are compiled once at module load instead of per-call; extraction now uses `matchAll`
+ - HTML Minifier Next import and preset resolution are cached per preset, avoiding repeated work across calls to `minifyString()`
+ - Ignore-list entries are pre-classified into hostnames (Set) and prefix entries once per `checkLinks()` call, enabling O(1) exact-hostname lookup in the hot path
+
## [1.2.0-beta] - 2026-05-11
### Added
diff --git a/src/adapters/check-links.js b/src/adapters/check-links.js
index 9819aff..f410c43 100644
--- a/src/adapters/check-links.js
+++ b/src/adapters/check-links.js
@@ -12,6 +12,9 @@ export const DEFAULT_LINK_TIMEOUT = 10_000;
const USER_AGENT = `hihtml/${version} link-checker`;
+const RE_ATTR = /\b(?:href|src|action)=(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)'|(https?:\/\/[^\s"'`=<>]+))/gi;
+const RE_SRCSET = /\bsrcset=["']([^"']+)["']/gi;
+
/**
* @typedef {Object} LinkResult
* @property {string} url
@@ -47,16 +50,13 @@ const USER_AGENT = `hihtml/${version} link-checker`;
*/
function extractUrls(content) {
const urls = new Set();
- let m;
- const attrRe = /\b(?:href|src|action)=(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)'|(https?:\/\/[^\s"'`=<>]+))/gi;
- while ((m = attrRe.exec(content)) !== null) {
+ for (const m of content.matchAll(RE_ATTR)) {
const rawUrl = m[1] ?? m[2] ?? m[3];
try { urls.add(new URL(rawUrl).href.split('#')[0]); } catch { /* skip malformed URLs */ }
}
- const srcsetRe = /\bsrcset=["']([^"']+)["']/gi;
- while ((m = srcsetRe.exec(content)) !== null) {
+ for (const m of content.matchAll(RE_SRCSET)) {
for (const entry of m[1].split(',')) {
const candidate = entry.trim().split(/\s+/)[0];
if (candidate.startsWith('http://') || candidate.startsWith('https://')) {
@@ -164,23 +164,38 @@ async function checkUrl(url, { timeout = DEFAULT_LINK_TIMEOUT, warnOnPermanentRe
}
/**
- * Returns true if the URL matches any entry in the ignore list.
- * Entries without a path component are matched by hostname (exact or subdomain).
- * Entries containing a slash are matched as URL prefixes.
- * @param {string} url
+ * @typedef {{ hostnames: Set, prefixes: string[] }} IgnoreList
+ */
+
+/**
+ * Pre-process an ignore list into hostname entries (Set for O(1) lookup) and prefix entries.
+ * Entries containing a slash are treated as URL prefixes; others as hostnames (exact or subdomain).
* @param {string[]} ignore
+ * @returns {IgnoreList}
+ */
+function buildIgnoreList(ignore) {
+ return {
+ hostnames: new Set(ignore.filter(e => !e.includes('/'))),
+ prefixes: ignore.filter(e => e.includes('/')),
+ };
+}
+
+/**
+ * Returns true if the URL matches any entry in the pre-processed ignore list.
+ * @param {string} url
+ * @param {IgnoreList} ignoreList
* @returns {boolean}
*/
-function isIgnored(url, ignore) {
- if (ignore.length === 0) return false;
+function isIgnored(url, { hostnames, prefixes }) {
+ if (hostnames.size === 0 && prefixes.length === 0) return false;
+ for (const prefix of prefixes) {
+ if (url.startsWith(prefix)) return true;
+ }
let hostname;
try { hostname = new URL(url).hostname; } catch { return false; }
- for (const entry of ignore) {
- if (entry.includes('/')) {
- if (url.startsWith(entry)) return true;
- } else {
- if (hostname === entry || hostname.endsWith(`.${entry}`)) return true;
- }
+ if (hostnames.has(hostname)) return true;
+ for (const h of hostnames) {
+ if (hostname.endsWith(`.${h}`)) return true;
}
return false;
}
@@ -230,10 +245,11 @@ export async function checkLinks(filePaths, {
for (const url of urls) allUrls.add(url);
}
+ const ignoreList = buildIgnoreList(ignore);
const toCheck = new Set();
const toSkip = new Set();
for (const url of allUrls) {
- if (isIgnored(url, ignore)) toSkip.add(url);
+ if (isIgnored(url, ignoreList)) toSkip.add(url);
else toCheck.add(url);
}
diff --git a/src/adapters/minify.js b/src/adapters/minify.js
index d3e5621..8ee704c 100644
--- a/src/adapters/minify.js
+++ b/src/adapters/minify.js
@@ -16,27 +16,36 @@ import { DEFAULT_CONCURRENCY, runWithConcurrency } from '../lib/concurrency.js';
* @property {number} saved
*/
+/** @type {Map }>} */
+const minifierCache = new Map();
+
/**
* Load HTML Minifier Next and resolve preset and extra options into a merged options object.
+ * The import and preset resolution are cached per preset name.
* @param {string} preset
* @param {Record} options
* @returns {Promise<{ htmlMinify: Function, resolvedOptions: Record }>}
*/
async function loadMinifier(preset, options) {
- let htmlMinify, getPreset;
- try {
- ({ minify: htmlMinify, getPreset } = await import('html-minifier-next'));
- } catch {
- throw new Error('Could not load HTML Minifier Next. Ensure it is installed and check for breaking API changes.');
- }
+ if (!minifierCache.has(preset)) {
+ let htmlMinify, getPreset;
+ try {
+ ({ minify: htmlMinify, getPreset } = await import('html-minifier-next'));
+ } catch {
+ throw new Error('Could not load HTML Minifier Next. Ensure it is installed and check for breaking API changes.');
+ }
+
+ let presetOptions;
+ try {
+ presetOptions = /** @type {Record} */ (getPreset(preset) ?? {});
+ } catch (err) {
+ throw new Error(`HTML Minifier Next API error—the package may have breaking changes: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
+ }
- let presetOptions;
- try {
- presetOptions = getPreset(preset) ?? {};
- } catch (err) {
- throw new Error(`HTML Minifier Next API error—the package may have breaking changes: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
+ minifierCache.set(preset, { htmlMinify, presetOptions });
}
+ const { htmlMinify, presetOptions } = /** @type {{ htmlMinify: Function, presetOptions: Record }} */ (minifierCache.get(preset));
return { htmlMinify, resolvedOptions: { ...presetOptions, ...options } };
}
diff --git a/src/adapters/validate.js b/src/adapters/validate.js
index eb1bf7e..efdda83 100644
--- a/src/adapters/validate.js
+++ b/src/adapters/validate.js
@@ -25,14 +25,17 @@ import { DEFAULT_CONCURRENCY, runWithConcurrency } from '../lib/concurrency.js';
* @property {number} countIgnored
*/
+/** @type {Map} */
+const validatorCache = new Map();
+
/**
- * Validate HTML files using HTML-validate.
- * @param {string[]} filePaths
- * @param {{ preset?: string, ignore?: string[], concurrency?: number, contents?: Map, onProgress?: () => void }} [options]
- * @returns {Promise}
+ * Return a cached HtmlValidate instance for the given preset, creating one if needed.
+ * @param {string} preset
+ * @returns {Promise}
*/
-export async function validate(filePaths, { preset = 'standard', ignore = [], concurrency = DEFAULT_CONCURRENCY, contents, onProgress } = {}) {
- const ignoreSet = new Set(Array.isArray(ignore) ? ignore.map(String) : []);
+async function getValidator(preset) {
+ if (validatorCache.has(preset)) return /** @type {import('html-validate').HtmlValidate} */ (validatorCache.get(preset));
+
let HtmlValidate;
try {
({ HtmlValidate } = await import('html-validate'));
@@ -47,6 +50,20 @@ export async function validate(filePaths, { preset = 'standard', ignore = [], co
throw new Error(`HTML-validate initialization failed—the package may have breaking changes: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
}
+ validatorCache.set(preset, validator);
+ return validator;
+}
+
+/**
+ * Validate HTML files using HTML-validate.
+ * @param {string[]} filePaths
+ * @param {{ preset?: string, ignore?: string[], concurrency?: number, contents?: Map, onProgress?: () => void }} [options]
+ * @returns {Promise}
+ */
+export async function validate(filePaths, { preset = 'standard', ignore = [], concurrency = DEFAULT_CONCURRENCY, contents, onProgress } = {}) {
+ const ignoreSet = new Set(Array.isArray(ignore) ? ignore.map(String) : []);
+ const validator = await getValidator(preset);
+
const files = await runWithConcurrency(filePaths, concurrency, async (filePath) => {
let content = contents?.get(filePath);
@@ -89,4 +106,4 @@ export async function validate(filePaths, { preset = 'standard', ignore = [], co
const countIgnored = files.reduce((acc, f) => acc + f.messages.filter(m => m.ignored).length, 0);
return { files, countErrors, countWarnings, countIgnored };
-}
+}
\ No newline at end of file
diff --git a/src/lib/files.js b/src/lib/files.js
index 3256eb5..257ad62 100644
--- a/src/lib/files.js
+++ b/src/lib/files.js
@@ -69,17 +69,19 @@ async function walk(dir, extensions, excludedDirs, results) {
throw err;
}
+ const subdirs = [];
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isSymbolicLink()) continue;
if (entry.isDirectory()) {
if (!excludedDirs.has(entry.name)) {
- await walk(fullPath, extensions, excludedDirs, results);
+ subdirs.push(walk(fullPath, extensions, excludedDirs, results));
}
} else if (entry.isFile()) {
const ext = path.extname(entry.name).slice(1).toLowerCase();
if (extensions.has(ext)) results.push(fullPath);
}
}
+ await Promise.all(subdirs);
}
From d3dbcd58c7f87af8661e987f6d69fa4b92853861 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:14:45 +0200
Subject: [PATCH 04/17] refactor: cache promises for HtmlValidate instances
Updated validator caching to store promises, ensuring concurrent callers share initialization and preventing redundant instantiation. Added error handling to remove failed promises from the cache for consistency.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
src/adapters/validate.js | 49 +++++++++++++++++++++++-----------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/src/adapters/validate.js b/src/adapters/validate.js
index efdda83..c3eaf0c 100644
--- a/src/adapters/validate.js
+++ b/src/adapters/validate.js
@@ -25,33 +25,40 @@ import { DEFAULT_CONCURRENCY, runWithConcurrency } from '../lib/concurrency.js';
* @property {number} countIgnored
*/
-/** @type {Map} */
+/** @type {Map>} */
const validatorCache = new Map();
/**
- * Return a cached HtmlValidate instance for the given preset, creating one if needed.
+ * Return a shared promise for a cached HtmlValidate instance for the given preset.
+ * Caching the promise rather than the resolved value means concurrent callers
+ * share a single initialization rather than each racing past the cache check.
* @param {string} preset
* @returns {Promise}
*/
-async function getValidator(preset) {
- if (validatorCache.has(preset)) return /** @type {import('html-validate').HtmlValidate} */ (validatorCache.get(preset));
-
- let HtmlValidate;
- try {
- ({ HtmlValidate } = await import('html-validate'));
- } catch {
- throw new Error('Could not load HTML-validate. Ensure it is installed and check for breaking API changes.');
- }
-
- let validator;
- try {
- validator = new HtmlValidate({ extends: [`html-validate:${preset}`] });
- } catch (err) {
- throw new Error(`HTML-validate initialization failed—the package may have breaking changes: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
- }
-
- validatorCache.set(preset, validator);
- return validator;
+function getValidator(preset) {
+ if (validatorCache.has(preset)) return /** @type {Promise} */ (validatorCache.get(preset));
+
+ const promise = (async () => {
+ let HtmlValidate;
+ try {
+ ({ HtmlValidate } = await import('html-validate'));
+ } catch {
+ throw new Error('Could not load HTML-validate. Ensure it is installed and check for breaking API changes.');
+ }
+
+ let validator;
+ try {
+ validator = new HtmlValidate({ extends: [`html-validate:${preset}`] });
+ } catch (err) {
+ throw new Error(`HTML-validate initialization failed—the package may have breaking changes: ${err instanceof Error ? err.message : String(err)}`, { cause: err });
+ }
+
+ return validator;
+ })();
+
+ promise.catch(() => validatorCache.delete(preset));
+ validatorCache.set(preset, promise);
+ return promise;
}
/**
From 894bcccc8a7d64d7ab3784cd8f31af4ac3f64c09 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:18:40 +0200
Subject: [PATCH 05/17] fix: handle mismatched capture groups in srcset regex
Updated the `RE_SRCSET` regex to handle alternatives with consistent capture groups. Adjusted the parsing logic to ensure compatibility with the updated regex, preventing potential parsing errors.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
src/adapters/check-links.js | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/adapters/check-links.js b/src/adapters/check-links.js
index f410c43..c619587 100644
--- a/src/adapters/check-links.js
+++ b/src/adapters/check-links.js
@@ -13,7 +13,7 @@ export const DEFAULT_LINK_TIMEOUT = 10_000;
const USER_AGENT = `hihtml/${version} link-checker`;
const RE_ATTR = /\b(?:href|src|action)=(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)'|(https?:\/\/[^\s"'`=<>]+))/gi;
-const RE_SRCSET = /\bsrcset=["']([^"']+)["']/gi;
+const RE_SRCSET = /\bsrcset=(?:"([^"]+)"|'([^']+)')/gi;
/**
* @typedef {Object} LinkResult
@@ -57,7 +57,7 @@ function extractUrls(content) {
}
for (const m of content.matchAll(RE_SRCSET)) {
- for (const entry of m[1].split(',')) {
+ for (const entry of (m[1] ?? m[2]).split(',')) {
const candidate = entry.trim().split(/\s+/)[0];
if (candidate.startsWith('http://') || candidate.startsWith('https://')) {
try { urls.add(new URL(candidate).href.split('#')[0]); } catch { /* skip malformed URLs */ }
From 7c6dc715f2231f0f6fdf971f2b535547cb63c38b Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:19:32 +0200
Subject: [PATCH 06/17] refactor: add comment clarifying validator cache limits
Added a comment explaining the unbounded nature of `validatorCache`, noting that it will only contain a small, fixed number of entries due to the limited set of presets exposed by `html-validate`. This enhances code readability and maintainability.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
src/adapters/validate.js | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/adapters/validate.js b/src/adapters/validate.js
index c3eaf0c..141b315 100644
--- a/src/adapters/validate.js
+++ b/src/adapters/validate.js
@@ -25,6 +25,8 @@ import { DEFAULT_CONCURRENCY, runWithConcurrency } from '../lib/concurrency.js';
* @property {number} countIgnored
*/
+// Intentionally unbounded: Keyed by preset name, and HTML-validate exposes a
+// fixed small set of presets, so this will never hold more than a handful of entries
/** @type {Map>} */
const validatorCache = new Map();
From fcdff089beb378ff1da474cbf621e991db8e5b56 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:20:27 +0200
Subject: [PATCH 07/17] docs: add/reorder options
Signed-off-by: Jens Oliver Meiert
---
README.md | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d79bc8c..82d697e 100644
--- a/README.md
+++ b/README.md
@@ -153,6 +153,8 @@ Checks all external http/https URLs (`href`, `src`, `srcset`, `action` attribute
* `options.concurrency`: Maximum concurrent requests (default: `8`)
* `options.warnOnPermanentRedirects`: Warn on 301/308 permanent redirects (default: `false`)
* `options.ignore`: List of hostnames or URL prefixes to skip (default: `[]`)
+* `options.onStart`: Called once with the total number of URLs to check
+* `options.onProgress`: Called after each URL is checked
Links are checked via HEAD request, falling back to GET on 405. 4xx and 5xx responses are reported as broken. Skipped URLs (from the ignore list) appear in results with `skipped: true` and are never counted as broken.
@@ -164,8 +166,8 @@ Checks all external http/https URLs found in an HTML string. Returns `Promise
Date: Wed, 13 May 2026 14:23:52 +0200
Subject: [PATCH 08/17] docs: add tags
Signed-off-by: Jens Oliver Meiert
---
package.json | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/package.json b/package.json
index 1a8c4c4..2df0313 100644
--- a/package.json
+++ b/package.json
@@ -26,13 +26,19 @@
"funding": "https://github.com/j9t/hihtml?sponsor=1",
"homepage": "https://github.com/j9t/hihtml",
"keywords": [
+ "conformance",
"html",
- "minify",
+ "html-minifier",
+ "html-minifier-next",
+ "html-validate",
+ "link-check",
+ "links",
"minification",
+ "minifier",
+ "minify",
+ "quality",
"validate",
- "validation",
- "qa",
- "qc"
+ "validation"
],
"license": "MIT",
"name": "hihtml",
From ed56b7bbb6b4a7594ae920fbedc836c5458e02c5 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:33:16 +0200
Subject: [PATCH 09/17] fix: handle spaces around `=` for attributes in regex
Updated the `RE_ATTR` and `RE_SRCSET` regexes to allow spaces around the `=` character for attributes like `href`, `src`, and `srcset`. Added corresponding test cases to ensure proper functionality and accuracy.
(This commit message was AI-generated.)
Signed-off-by: Jens Oliver Meiert
---
bin/hihtml.test.js | 8 ++++++++
src/adapters/check-links.js | 4 ++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/bin/hihtml.test.js b/bin/hihtml.test.js
index 44e51bc..710c230 100644
--- a/bin/hihtml.test.js
+++ b/bin/hihtml.test.js
@@ -948,6 +948,14 @@ describe('URL extraction', () => {
test('`srcset` single-quoted', async () => {
assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
});
+
+ test('`href` with spaces around `=`', async () => {
+ assert.deepStrictEqual(await found(`link`), { checked: 1, broken: 0 });
+ });
+
+ test('`srcset` with spaces around `=`', async () => {
+ assert.deepStrictEqual(await found(`
`), { checked: 1, broken: 0 });
+ });
});
// Programmatic API: `minify`
diff --git a/src/adapters/check-links.js b/src/adapters/check-links.js
index c619587..5a7421d 100644
--- a/src/adapters/check-links.js
+++ b/src/adapters/check-links.js
@@ -12,8 +12,8 @@ export const DEFAULT_LINK_TIMEOUT = 10_000;
const USER_AGENT = `hihtml/${version} link-checker`;
-const RE_ATTR = /\b(?:href|src|action)=(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)'|(https?:\/\/[^\s"'`=<>]+))/gi;
-const RE_SRCSET = /\bsrcset=(?:"([^"]+)"|'([^']+)')/gi;
+const RE_ATTR = /\b(?:href|src|action)\s*=\s*(?:"(https?:\/\/[^"\s>]+)"|'(https?:\/\/[^'\s>]+)'|(https?:\/\/[^\s"'`=<>]+))/gi;
+const RE_SRCSET = /\bsrcset\s*=\s*(?:"([^"]+)"|'([^']+)')/gi;
/**
* @typedef {Object} LinkResult
From 4eebbdd64e6f9ee85ccede9e3b98983ee5c84a8a Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 14:53:42 +0200
Subject: [PATCH 10/17] fix: skip ignored and irrelevant URLs during link
checks
Updated URL extraction logic to disregard URLs inside HTML comments, ``), { checked: 0, broken: 0 });
+ });
+
+ test('Still checks ``), { checked: 1, broken: 0 });
+ });
+
+ test('Does not check URLs inside ``), { checked: 0, broken: 0 });
+ });
});
// Programmatic API: `minify`
diff --git a/src/adapters/check-links.js b/src/adapters/check-links.js
index 5a7421d..ee31e66 100644
--- a/src/adapters/check-links.js
+++ b/src/adapters/check-links.js
@@ -51,12 +51,17 @@ const RE_SRCSET = /\bsrcset\s*=\s*(?:"([^"]+)"|'([^']+)')/gi;
function extractUrls(content) {
const urls = new Set();
- for (const m of content.matchAll(RE_ATTR)) {
+ const stripped = content
+ .replace(//g, '')
+ .replace(/(')
+ .replace(/(');
+
+ for (const m of stripped.matchAll(RE_ATTR)) {
const rawUrl = m[1] ?? m[2] ?? m[3];
try { urls.add(new URL(rawUrl).href.split('#')[0]); } catch { /* skip malformed URLs */ }
}
- for (const m of content.matchAll(RE_SRCSET)) {
+ for (const m of stripped.matchAll(RE_SRCSET)) {
for (const entry of (m[1] ?? m[2]).split(',')) {
const candidate = entry.trim().split(/\s+/)[0];
if (candidate.startsWith('http://') || candidate.startsWith('https://')) {
@@ -174,9 +179,10 @@ async function checkUrl(url, { timeout = DEFAULT_LINK_TIMEOUT, warnOnPermanentRe
* @returns {IgnoreList}
*/
function buildIgnoreList(ignore) {
+ const normalized = ignore.map(e => e.trim().toLowerCase());
return {
- hostnames: new Set(ignore.filter(e => !e.includes('/'))),
- prefixes: ignore.filter(e => e.includes('/')),
+ hostnames: new Set(normalized.filter(e => !e.includes('/'))),
+ prefixes: normalized.filter(e => e.includes('/')).map(e => e.replace(/\/+$/, '')),
};
}
@@ -188,8 +194,9 @@ function buildIgnoreList(ignore) {
*/
function isIgnored(url, { hostnames, prefixes }) {
if (hostnames.size === 0 && prefixes.length === 0) return false;
+ const urlLower = url.toLowerCase();
for (const prefix of prefixes) {
- if (url.startsWith(prefix)) return true;
+ if (urlLower.startsWith(prefix)) return true;
}
let hostname;
try { hostname = new URL(url).hostname; } catch { return false; }
From 4611e5a42aa4a1b6a0ade9e5d8e133bca67b1907 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 17:53:03 +0200
Subject: [PATCH 11/17] refactor: rename constants
Signed-off-by: Jens Oliver Meiert
---
bin/hihtml.test.js | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/bin/hihtml.test.js b/bin/hihtml.test.js
index cb89de8..78ac49c 100644
--- a/bin/hihtml.test.js
+++ b/bin/hihtml.test.js
@@ -33,9 +33,9 @@ function run(args, stdinInput = '', cwd = undefined) {
// Fixtures
-const CLEAN_HTML = 'TestHello.
';
-const DEPRECATED_HTML = 'TestOld';
-const INVALID_HTML = 'TestBad nesting.
';
+const HTML_CLEAN = 'TestHello.
';
+const HTML_DEPRECATED = 'TestOld';
+const HTML_INVALID = 'TestBad nesting.
';
/** @type {http.Server} */
let testServer;
@@ -46,9 +46,9 @@ let testServerBase;
before(async () => {
fs.mkdirSync(tempDir, { recursive: true });
- fs.writeFileSync(path.join(tempDir, 'clean.html'), CLEAN_HTML);
- fs.writeFileSync(path.join(tempDir, 'deprecated.html'), DEPRECATED_HTML);
- fs.writeFileSync(path.join(tempDir, 'invalid.html'), INVALID_HTML);
+ fs.writeFileSync(path.join(tempDir, 'clean.html'), HTML_CLEAN);
+ fs.writeFileSync(path.join(tempDir, 'deprecated.html'), HTML_DEPRECATED);
+ fs.writeFileSync(path.join(tempDir, 'invalid.html'), HTML_INVALID);
testServer = await new Promise(resolve => {
const server = http.createServer((req, res) => {
@@ -686,7 +686,7 @@ describe('Check code', () => {
describe('Check code string', () => {
test('Returns expected result shape', async () => {
- const result = await checkCodeString(CLEAN_HTML);
+ const result = await checkCodeString(HTML_CLEAN);
assert.ok('validation' in result);
assert.ok('deprecation' in result);
assert.ok('countErrors' in result.validation);
@@ -694,26 +694,26 @@ describe('Check code string', () => {
});
test('Clean HTML reports no issues', async () => {
- const result = await checkCodeString(CLEAN_HTML);
+ const result = await checkCodeString(HTML_CLEAN);
assert.strictEqual(result.validation.countErrors, 0);
assert.strictEqual(result.deprecation.countIssues, 0);
});
test('Detects deprecated markup', async () => {
- const result = await checkCodeString(DEPRECATED_HTML);
+ const result = await checkCodeString(HTML_DEPRECATED);
assert.ok(result.deprecation.countIssues > 0);
assert.ok(result.deprecation.files[0].elements.includes('center'));
});
test('Detects validation errors', async () => {
- const result = await checkCodeString(INVALID_HTML);
+ const result = await checkCodeString(HTML_INVALID);
assert.ok(result.validation.countErrors > 0);
});
test('Passes ignore list through to validation result', async () => {
- const base = await checkCodeString(INVALID_HTML);
+ const base = await checkCodeString(HTML_INVALID);
const ruleIds = [...new Set(base.validation.files[0].messages.map(m => m.ruleId))];
- const result = await checkCodeString(INVALID_HTML, { ignore: ruleIds });
+ const result = await checkCodeString(HTML_INVALID, { ignore: ruleIds });
assert.strictEqual(result.validation.countErrors, 0);
assert.strictEqual(result.validation.countIgnored, base.validation.files[0].messages.length);
});
@@ -889,7 +889,7 @@ describe('Check links string', () => {
});
test('No http/https links returns empty result', async () => {
- const result = await checkLinksString(CLEAN_HTML);
+ const result = await checkLinksString(HTML_CLEAN);
assert.strictEqual(result.countBroken, 0);
assert.strictEqual(result.countChecked, 0);
assert.strictEqual(result.files[0].links.length, 0);
@@ -1053,13 +1053,13 @@ describe('Minify files', () => {
describe('Minify string', () => {
test('Returns a string', async () => {
- const result = await minifyString(CLEAN_HTML);
+ const result = await minifyString(HTML_CLEAN);
assert.strictEqual(typeof result, 'string');
});
test('Output is not larger than input', async () => {
- const result = await minifyString(CLEAN_HTML);
- assert.ok(Buffer.byteLength(result) <= Buffer.byteLength(CLEAN_HTML));
+ const result = await minifyString(HTML_CLEAN);
+ assert.ok(Buffer.byteLength(result) <= Buffer.byteLength(HTML_CLEAN));
});
test('Collapses whitespace with default preset', async () => {
From 2cb2e90a053b2326980c840f17185abe6ab1e98b Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 17:55:49 +0200
Subject: [PATCH 12/17] chore: change test text language
Signed-off-by: Jens Oliver Meiert
---
bin/hihtml.test.js | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/bin/hihtml.test.js b/bin/hihtml.test.js
index 78ac49c..d03ec90 100644
--- a/bin/hihtml.test.js
+++ b/bin/hihtml.test.js
@@ -33,9 +33,9 @@ function run(args, stdinInput = '', cwd = undefined) {
// Fixtures
-const HTML_CLEAN = 'TestHello.
';
-const HTML_DEPRECATED = 'TestOld';
-const HTML_INVALID = 'TestBad nesting.
';
+const HTML_CLEAN = 'TestYes
';
+const HTML_DEPRECATED = 'TestNot anymore';
+const HTML_INVALID = 'TestNo
';
/** @type {http.Server} */
let testServer;
@@ -1083,7 +1083,7 @@ describe('Read files', () => {
const result = await read([fileClean]);
assert.ok(result instanceof Map);
assert.ok(result.has(fileClean));
- assert.ok(result.get(fileClean).includes('Hello'));
+ assert.ok(result.get(fileClean).includes('Yes'));
});
test('Skips unreadable files gracefully', async () => {
From e645cf85c60ba8b01039322af49535396f112338 Mon Sep 17 00:00:00 2001
From: Jens Oliver Meiert
Date: Wed, 13 May 2026 18:04:39 +0200
Subject: [PATCH 13/17] chore: adjust hihtml capitalization
Signed-off-by: Jens Oliver Meiert
---
CHANGELOG.md | 2 +-
README.md | 16 ++++++++--------
SECURITY.md | 2 +-
src/index.d.ts | 4 ++--
src/index.types.ts | 4 ++--
src/lib/config.js | 4 ++--
6 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba41789..d82ac07 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
# Changelog
-All notable changes to HiHTML are documented in this file, which is (mostly) AI-generated and (always) human-edited. Dependency updates may or may not be called out specifically.
+All notable changes to hihtml are documented in this file, which is (mostly) AI-generated and (always) human-edited. Dependency updates may or may not be called out specifically.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
diff --git a/README.md b/README.md
index 82d697e..2860156 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
-# HiHTML, the HTML Processing Supertool (Beta)
+# hihtml, the HTML Processing Supertool (Beta)
[](https://www.npmjs.com/package/hihtml) [](https://github.com/j9t/hihtml/actions) [](https://socket.dev/npm/package/hihtml) [](https://github.com/j9t/hihtml?sponsor=1)
-HiHTML—“High Quality HTML”—bundles key HTML tools into one, making HTML validation and semantics control, link checking, and minification as easy as it gets: [HTML-validate](https://html-validate.org/) for validation, [ObsoHTML](https://github.com/j9t/obsohtml) for deprecated markup detection, Node’s built-in `http`/`https` for link checking, and [HTML Minifier Next](https://github.com/j9t/html-minifier-next) for minification. HiHTML provides a CLI and a programmatic API, and comes with strong defaults but is still highly configurable.
+hihtml—“high-quality HTML”—bundles several key HTML tools into one, making HTML validation and semantics control, link checking, and minification as easy as it gets: [HTML-validate](https://html-validate.org/) for validation, [ObsoHTML](https://github.com/j9t/obsohtml) for deprecated markup detection, Node’s built-in `http`/`https` for link checking, and [HTML Minifier Next](https://github.com/j9t/html-minifier-next) for minification. hihtml provides a CLI and a programmatic API, and comes with strong defaults but is still highly configurable.
## Usage
@@ -14,11 +14,11 @@ HiHTML—“High Quality HTML”—bundles key HTML tools into one, making HTML
npm i hihtml
```
-Recommended: Just run HiHTML via `npx hihtml`.
+Recommended: Just run hihtml via `npx hihtml`.
#### Execution
-Without options, HiHTML validates HTML files and checks for deprecated markup in the current directory. Use flags to control behavior:
+Without options, hihtml validates HTML files and checks for deprecated markup in the current directory. Use flags to control behavior:
| Flag | Description |
|---|---|
@@ -188,11 +188,11 @@ Minifies an HTML string using HTML Minifier Next. Returns `Promise`. Use
#### `loadConfig(cwd?, filePath?)`
-Loads HiHTML configuration. When `filePath` is given, only that file is read (no CWD fallback); if it contains a `"hihtml"` key that value is used, otherwise the root object is used. Without `filePath`, reads `.hihtml.json` or the `"hihtml"` key in `package.json` from `cwd`. Returns `Promise`.
+Loads hihtml configuration. When `filePath` is given, only that file is read (no CWD fallback); if it contains a `"hihtml"` key that value is used, otherwise the root object is used. Without `filePath`, reads `.hihtml.json` or the `"hihtml"` key in `package.json` from `cwd`. Returns `Promise`.
## Configuration
-Create a .hihtml.json file in your project root, or add a `"hihtml"` key to package.json. Both use the same format (here showing HiHTML’s defaults):
+Create a .hihtml.json file in your project root, or add a `"hihtml"` key to package.json. Both use the same format (here showing hihtml’s defaults):
```json
{
@@ -236,12 +236,12 @@ If in doubt or in a hurry, [report issues here](https://github.com/j9t/hihtml/is
### What does ObsoHTML do here when HTML-validate already reports on deprecated markup?
-At the moment, ObsoHTML catches some elements and attributes that HTML-validate doesn’t. Once HTML-validate covers everything ObsoHTML covers, ObsoHTML is going to be removed from HiHTML. Note that ObsoHTML is purely informational—it doesn’t prevent minification when used with the `--all`/`-a` flag.
+At the moment, ObsoHTML catches some elements and attributes that HTML-validate doesn’t. Once HTML-validate covers everything ObsoHTML covers, ObsoHTML is going to be removed from hihtml. Note that ObsoHTML is purely informational—it doesn’t prevent minification when used with the `--all`/`-a` flag.
***
You might like some of my other work:
-* Optimization tools: HiHTML (including [HTML Minifier Next](https://github.com/j9t/html-minifier-next) + [ObsoHTML](https://github.com/j9t/obsohtml)) · [Image Guard](https://github.com/j9t/image-guard) · [Compressor.js Next](https://github.com/j9t/compressorjs-next) · [.htaccess Punk](https://github.com/j9t/htaccess-punk)
+* Optimization tools: hihtml (including [HTML Minifier Next](https://github.com/j9t/html-minifier-next) + [ObsoHTML](https://github.com/j9t/obsohtml)) · [Image Guard](https://github.com/j9t/image-guard) · [Compressor.js Next](https://github.com/j9t/compressorjs-next) · [.htaccess Punk](https://github.com/j9t/htaccess-punk)
* Defense tools: [IA Defensa](https://iadefensa.com/solutions/)
* Resources for quality web development: [Articles](https://meiert.com/topics/development/) · [Books](https://meiert.com/topics/books/) (including [_On Web Development_](https://meiert.com/blog/on-web-development-2/)) · [News](https://frontenddogma.com/) · [Terminology](https://webglossary.info/)
\ No newline at end of file
diff --git a/SECURITY.md b/SECURITY.md
index e5bfae8..c95f375 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,7 +2,7 @@
## Supported Versions
-Only the latest and therefore current version of HiHTML is supported. It’s advised to update older versions to the latest version.
+Only the latest and therefore current version of hihtml is supported. It’s advised to update older versions to the latest version.
## Reporting a Vulnerability
diff --git a/src/index.d.ts b/src/index.d.ts
index 056e5cc..0a6fdc1 100644
--- a/src/index.d.ts
+++ b/src/index.d.ts
@@ -76,7 +76,7 @@ export interface MinificationResult {
saved: number;
}
-export interface HiHTMLConfig {
+export interface HihtmlConfig {
extensions?: string[];
ignore?: string[];
validation?: { preset?: string; ignore?: string[] };
@@ -100,7 +100,7 @@ export declare function read(
options?: { concurrency?: number; onProgress?: () => void }
): Promise