From 9d81fafa8c4783f3026057cbbf096722a42c841a Mon Sep 17 00:00:00 2001 From: calixteman Date: Sun, 8 Mar 2026 15:47:42 +0100 Subject: [PATCH] Add a new internal viewer to explore the structure of PDF files. The one from pdf.js.utils is a bit too old: a lot of bugs have been fixed in the code that parses PDF files since then. It's just an internal development tool, so it doesn't need to be perfect, but it should be good enough to be useful. --- gulpfile.mjs | 55 +++ src/core/document.js | 241 ++++++++++- src/core/worker.js | 16 + src/display/api.js | 8 + web/pdf_internal_viewer.css | 304 +++++++++++++ web/pdf_internal_viewer.html | 82 ++++ web/pdf_internal_viewer.js | 813 +++++++++++++++++++++++++++++++++++ 7 files changed, 1508 insertions(+), 11 deletions(-) create mode 100644 web/pdf_internal_viewer.css create mode 100644 web/pdf_internal_viewer.html create mode 100644 web/pdf_internal_viewer.js diff --git a/gulpfile.mjs b/gulpfile.mjs index 4dd951834bd71..fb67966b184a2 100644 --- a/gulpfile.mjs +++ b/gulpfile.mjs @@ -65,6 +65,7 @@ const IMAGE_DECODERS_LEGACY_DIR = BUILD_DIR + "image_decoders-legacy/"; const DEFAULT_PREFERENCES_DIR = BUILD_DIR + "default_preferences/"; const MINIFIED_DIR = BUILD_DIR + "minified/"; const MINIFIED_LEGACY_DIR = BUILD_DIR + "minified-legacy/"; +const INTERNAL_VIEWER_DIR = BUILD_DIR + "internal-viewer/"; const JSDOC_BUILD_DIR = BUILD_DIR + "jsdoc/"; const GH_PAGES_DIR = BUILD_DIR + "gh-pages/"; const DIST_DIR = BUILD_DIR + "dist/"; @@ -2368,6 +2369,52 @@ gulp.task("check_l10n", function (done) { }); }); +function createInternalViewerBundle(defines) { + const viewerFileConfig = createWebpackConfig(defines, { + filename: "pdf_internal_viewer.mjs", + library: { + type: "module", + }, + }); + return gulp + .src("./web/pdf_internal_viewer.js", { encoding: false }) + .pipe(webpack2Stream(viewerFileConfig)); +} + +function buildInternalViewer(defines, dir) { + fs.rmSync(dir, { recursive: true, force: true }); + + return ordered([ + createMainBundle(defines).pipe(gulp.dest(dir + "build")), + createWorkerBundle(defines).pipe(gulp.dest(dir + "build")), + createInternalViewerBundle(defines).pipe(gulp.dest(dir + "web")), + preprocessHTML("web/pdf_internal_viewer.html", defines).pipe( + gulp.dest(dir + "web") + ), + preprocessCSS("web/pdf_internal_viewer.css", defines) + .pipe( + postcss([ + postcssDirPseudoClass(), + discardCommentsCSS(), + postcssNesting(), + postcssLightDarkFunction({ preserve: true }), + autoprefixer(AUTOPREFIXER_CONFIG), + ]) + ) + .pipe(gulp.dest(dir + "web")), + createWasmBundle().pipe(gulp.dest(dir + "web/wasm")), + ]); +} + +gulp.task( + "internal-viewer", + gulp.series(createBuildNumber, function createInternalViewer() { + console.log("\n### Creating internal viewer"); + const defines = { ...DEFINES, GENERIC: true }; + return buildInternalViewer(defines, INTERNAL_VIEWER_DIR); + }) +); + function ghPagesPrepare() { console.log("\n### Creating web site"); @@ -2391,6 +2438,13 @@ function ghPagesPrepare() { gulp .src(JSDOC_BUILD_DIR + "**/*", { base: JSDOC_BUILD_DIR, encoding: false }) .pipe(gulp.dest(GH_PAGES_DIR + "api/draft/")), + gulp + .src(INTERNAL_VIEWER_DIR + "**/*", { + base: INTERNAL_VIEWER_DIR, + encoding: false, + removeBOM: false, + }) + .pipe(gulp.dest(GH_PAGES_DIR + "internal-viewer/")), ]); } @@ -2442,6 +2496,7 @@ gulp.task( gulp.series( "generic", "generic-legacy", + "internal-viewer", "jsdoc", ghPagesPrepare, "metalsmith" diff --git a/src/core/document.js b/src/core/document.js index a4d447f598efb..f0a291e9548d8 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -22,6 +22,7 @@ import { isArrayEqual, makeArr, objectSize, + OPS, PageActionEventType, RenderingIntentFlag, shadow, @@ -37,6 +38,17 @@ import { PopupAnnotation, WidgetAnnotation, } from "./annotation.js"; +import { + Cmd, + Dict, + EOF, + isName, + isRefsEqual, + Name, + Ref, + RefSet, + RefSetCache, +} from "./primitives.js"; import { collectActions, getInheritableProperty, @@ -51,16 +63,9 @@ import { XRefEntryException, XRefParseException, } from "./core_utils.js"; -import { - Dict, - isName, - isRefsEqual, - Name, - Ref, - RefSet, - RefSetCache, -} from "./primitives.js"; +import { EvaluatorPreprocessor, PartialEvaluator } from "./evaluator.js"; import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js"; +import { Lexer, Linearization, Parser } from "./parser.js"; import { NullStream, Stream } from "./stream.js"; import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./calculate_md5.js"; @@ -68,10 +73,11 @@ import { Catalog } from "./catalog.js"; import { clearGlobalCaches } from "./cleanup_helper.js"; import { DatasetReader } from "./dataset_reader.js"; import { Intersector } from "./intersector.js"; -import { Linearization } from "./parser.js"; +import { LocalColorSpaceCache } from "./image_utils.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; -import { PartialEvaluator } from "./evaluator.js"; +import { PDFFunctionFactory } from "./function.js"; +import { PDFImage } from "./image.js"; import { StreamsSequenceStream } from "./decode_stream.js"; import { StructTreePage } from "./struct_tree.js"; import { XFAFactory } from "./xfa/factory.js"; @@ -2030,6 +2036,219 @@ class PDFDocument { AnnotationFactory.createGlobals(this.pdfManager) ); } + + async toJSObject(value, firstCall = true) { + if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { + throw new Error("Not implemented: toJSObject"); + } + + if (value === null && firstCall) { + return this.toJSObject(this.xref.trailer, false); + } + if (value instanceof Dict) { + const obj = Object.create(null); + const isPage = isName(value.get("Type"), "Page"); + for (const [key, val] of value.getRawEntries()) { + obj[key] = + isPage && key === "Contents" + ? _getContentTokens(val, this.xref) + : await this.toJSObject(val, false); + } + return obj; + } + if (Array.isArray(value)) { + return Promise.all(value.map(v => this.toJSObject(v, false))); + } + if (value instanceof Ref) { + if (firstCall) { + return this.toJSObject(this.xref.fetch(value), false); + } + const result = Object.create(null); + result.num = value.num; + result.gen = value.gen; + return result; + } + if (value instanceof BaseStream) { + const { dict } = value; + const obj = Object.create(null); + obj.dict = await this.toJSObject(dict, false); + + if ( + isName(dict.get("Type"), "XObject") && + isName(dict.get("Subtype"), "Image") + ) { + try { + const pdfFunctionFactory = new PDFFunctionFactory({ + xref: this.xref, + isEvalSupported: this.pdfManager.evaluatorOptions.isEvalSupported, + }); + const imageObj = await PDFImage.buildImage({ + xref: this.xref, + res: Dict.empty, + image: value, + pdfFunctionFactory, + globalColorSpaceCache: this.catalog.globalColorSpaceCache, + localColorSpaceCache: new LocalColorSpaceCache(), + }); + const imgData = await imageObj.createImageData( + /* forceRGBA = */ true, + /* isOffscreenCanvasSupported = */ false + ); + obj.imageData = { + width: imgData.width, + height: imgData.height, + kind: imgData.kind, + data: imgData.data, + }; + return obj; + } catch { + // Fall through to regular byte stream if image decoding fails. + } + } + + if (isName(dict.get("Subtype"), "Form")) { + obj.bytes = value.getString(); + value.reset(); + const { instructions, cmdNames } = _groupIntoInstructions( + _tokenizeStream(value, this.xref) + ); + obj.contentStream = true; + obj.instructions = instructions; + obj.cmdNames = cmdNames; + return obj; + } + + obj.bytes = value.getString(); + return obj; + } + return value; + } +} + +function _tokenizeStream(stream, xref) { + const tokens = []; + const parser = new Parser({ + lexer: new Lexer(stream), + xref, + allowStreams: false, + }); + while (true) { + let obj; + try { + obj = parser.getObj(); + } catch { + break; + } + if (obj === EOF) { + break; + } + const token = _tokenToJSObject(obj); + if (token !== null) { + tokens.push(token); + } + } + return tokens; +} + +function _getContentTokens(contentsVal, xref) { + const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal]; + const rawContents = []; + const tokens = []; + for (const rawRef of refs) { + if (rawRef instanceof Ref) { + rawContents.push({ num: rawRef.num, gen: rawRef.gen }); + } + const stream = xref.fetchIfRef(rawRef); + if (!(stream instanceof BaseStream)) { + continue; + } + tokens.push(..._tokenizeStream(stream, xref)); + } + const { instructions, cmdNames } = _groupIntoInstructions(tokens); + return { contentStream: true, instructions, cmdNames, rawContents }; +} + +// Lazily-built reverse map: OPS numeric id → property name string. +let _opsIdToName = null; + +function _getOpsIdToName() { + if (!_opsIdToName) { + _opsIdToName = Object.create(null); + for (const [name, id] of Object.entries(OPS)) { + _opsIdToName[id] = name; + } + } + return _opsIdToName; +} + +function _groupIntoInstructions(tokens) { + const { opMap } = EvaluatorPreprocessor; + const opsIdToName = _getOpsIdToName(); + const instructions = []; + const cmdNames = Object.create(null); + const argBuffer = []; + for (const token of tokens) { + if (token.type !== "cmd") { + argBuffer.push(token); + continue; + } + const op = opMap[token.value]; + if (op && !(token.value in cmdNames)) { + cmdNames[token.value] = opsIdToName[op.id]; + } + let args; + if (!op || op.variableArgs) { + // Unknown command or variable args: consume all pending args. + args = argBuffer.splice(0); + } else { + // Fixed args: consume exactly numArgs, orphan the rest. + const orphanCount = Math.max(0, argBuffer.length - op.numArgs); + for (let i = 0; i < orphanCount; i++) { + instructions.push({ cmd: null, args: [argBuffer.shift()] }); + } + args = argBuffer.splice(0); + } + instructions.push({ cmd: token.value, args }); + } + for (const t of argBuffer) { + instructions.push({ cmd: null, args: [t] }); + } + return { instructions, cmdNames }; +} + +function _tokenToJSObject(obj) { + if (obj instanceof Cmd) { + return { type: "cmd", value: obj.cmd }; + } + if (obj instanceof Name) { + return { type: "name", value: obj.name }; + } + if (obj instanceof Ref) { + return { type: "ref", num: obj.num, gen: obj.gen }; + } + if (Array.isArray(obj)) { + return { type: "array", value: obj.map(_tokenToJSObject) }; + } + if (obj instanceof Dict) { + const result = Object.create(null); + for (const [key, val] of obj.getRawEntries()) { + result[key] = _tokenToJSObject(val); + } + return { type: "dict", value: result }; + } + if (typeof obj === "number") { + return { type: "number", value: obj }; + } + if (typeof obj === "string") { + return { type: "string", value: obj }; + } + if (typeof obj === "boolean") { + return { type: "boolean", value: obj }; + } + if (obj === null) { + return { type: "null" }; + } + return null; } export { Page, PDFDocument }; diff --git a/src/core/worker.js b/src/core/worker.js index 29aa80cf82a68..59e65e9865d39 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -952,6 +952,22 @@ class WorkerMessageHandler { return pdfManager.fontFallback(data.id, handler); }); + handler.on("GetRawData", async function ({ ref, page }) { + if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { + throw new Error("Not implemented: GetRawData"); + } + let value = null; + if (page >= 1) { + value = (await pdfManager.ensureCatalog("getPageDict", [page - 1]))[1]; + } else if (ref) { + value = + typeof ref === "string" + ? Ref.fromString(ref) + : Ref.get(ref.num, ref.gen); + } + return pdfManager.ensureDoc("toJSObject", [value]); + }); + handler.on("Cleanup", function (data) { return pdfManager.cleanup(/* manuallyTriggered = */ true); }); diff --git a/src/display/api.js b/src/display/api.js index f77b572722bce..dfada5fd1f0f2 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1065,6 +1065,10 @@ class PDFDocumentProxy { return this._transport.downloadInfoCapability.promise; } + getRawData(data) { + return this._transport.getRawData(data); + } + /** * Cleans up resources allocated by the document on both the main and worker * threads. @@ -3173,6 +3177,10 @@ class WorkerTransport { return this.messageHandler.sendWithPromise("GetMarkInfo", null); } + getRawData(data) { + return this.messageHandler.sendWithPromise("GetRawData", data); + } + async startCleanup(keepLoadedFonts = false) { if (this.destroyed) { return; // No need to manually clean-up when destruction has started. diff --git a/web/pdf_internal_viewer.css b/web/pdf_internal_viewer.css new file mode 100644 index 0000000000000..553ef45b15ac3 --- /dev/null +++ b/web/pdf_internal_viewer.css @@ -0,0 +1,304 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +:root { + color-scheme: light dark; +} +* { + box-sizing: border-box; +} +body { + font-family: "Courier New", Courier, monospace; + margin: 0; + padding: 16px; + background: light-dark(#fff, #1e1e1e); + color: light-dark(#1e1e1e, #d4d4d4); + font-size: 13px; + line-height: 1.5; +} +#header { + display: flex; + align-items: baseline; + justify-content: space-between; + margin-bottom: 12px; + + h1 { + color: light-dark(#0070c1, #9cdcfe); + font-size: 1.2em; + margin: 0; + } + + #pdf-info { + font-family: system-ui, sans-serif; + font-size: 1.15em; + font-weight: 500; + color: light-dark(#1e1e1e, #d4d4d4); + } +} +#password-dialog { + background: light-dark(#fff, #2d2d2d); + color: light-dark(#1e1e1e, #d4d4d4); + border: 1px solid light-dark(#ccc, #555); + border-radius: 6px; + padding: 20px; + min-width: 320px; + + &::backdrop { + background: rgb(0 0 0 / 0.4); + } + + p { + margin: 0 0 12px; + } + + input { + display: block; + width: 100%; + margin-top: 4px; + background: light-dark(#fff, #3c3c3c); + color: light-dark(#1e1e1e, #d4d4d4); + border: 1px solid light-dark(#c8c8c8, #555); + border-radius: 3px; + padding: 4px 8px; + font-family: inherit; + font-size: inherit; + } + + .password-dialog-buttons { + display: flex; + justify-content: flex-end; + gap: 8px; + margin-top: 16px; + + button { + padding: 4px 14px; + border-radius: 3px; + border: 1px solid light-dark(#c8c8c8, #555); + background: light-dark(#f3f3f3, #3c3c3c); + color: inherit; + cursor: pointer; + font-family: inherit; + font-size: inherit; + + &:hover { + background: light-dark(#e0e0e0, #4a4a4a); + } + } + } +} +#controls { + position: sticky; + top: 0; + z-index: 1; + display: flex; + flex-direction: row; + align-items: center; + gap: 12px; + margin-bottom: 16px; + padding: 10px 14px; + background: light-dark(#f3f3f3, #252526); + border-radius: 4px; + border: 1px solid light-dark(#e0e0e0, #3c3c3c); + + label { + display: flex; + align-items: center; + gap: 4px; + color: light-dark(#6e6e6e, #888); + } + + #github-link { + margin-inline-start: auto; + display: flex; + align-items: center; + color: light-dark(#6e6e6e, #aaa); + text-decoration: none; + + &:hover { + color: light-dark(#1e1e1e, #fff); + } + + svg { + width: 20px; + height: 20px; + fill: currentColor; + } + } +} +#goto-input { + background: light-dark(#fff, #3c3c3c); + color: light-dark(#1e1e1e, #d4d4d4); + border: 1px solid light-dark(#c8c8c8, #555); + border-radius: 3px; + padding: 2px 6px; + font-family: inherit; + font-size: inherit; + + &:disabled { + opacity: 0.4; + } + &[aria-invalid="true"] { + border-color: #f66; + } +} +#status { + color: light-dark(#6e6e6e, #888); + font-style: italic; +} +#tree { + padding: 8px 12px; + background: light-dark(#f3f3f3, #252526); + border-radius: 4px; + border: 1px solid light-dark(#e0e0e0, #3c3c3c); + min-height: 60px; + + .node { + display: block; + padding: 1px 0; + } + .key { + color: light-dark(#0070c1, #9cdcfe); + } + .separator { + color: light-dark(#6e6e6e, #888); + } + [role="button"] { + display: inline-block; + width: 14px; + font-size: 0.7em; + color: light-dark(#666, #aaa); + cursor: pointer; + user-select: none; + vertical-align: middle; + } + [role="group"] { + padding-left: 20px; + border-left: 1px dashed light-dark(#d0d0d0, #444); + margin-left: 2px; + + &.hidden { + display: none; + } + } + .ref { + color: light-dark(#007b6e, #4ec9b0); + cursor: pointer; + text-decoration: underline dotted; + + &:hover { + color: light-dark(#065, #89d9c8); + } + } + .str-value { + color: light-dark(#a31515, #ce9178); + } + .num-value { + color: light-dark(#098658, #b5cea8); + } + .bool-value { + color: light-dark(#00f, #569cd6); + } + .null-value { + color: light-dark(#767676, #808080); + } + .name-value { + color: light-dark(#795e26, #dcdcaa); + } + .bracket { + color: light-dark(#6e6e6e, #888); + cursor: pointer; + user-select: none; + + &:hover { + color: light-dark(#444, #bbb); + } + } + .stream-label { + color: light-dark(#af00db, #c586c0); + font-style: italic; + } + [role="status"] { + color: light-dark(#6e6e6e, #888); + font-style: italic; + } + [role="alert"] { + color: #f66; + } + .bytes-content { + padding-left: 20px; + white-space: pre-wrap; + font-size: 1em; + opacity: 0.85; + color: light-dark(#a31515, #ce9178); + } + .bytes-hex { + font-family: monospace; + color: light-dark(#00f, #569cd6); + } + .image-preview { + display: block; + margin-top: 4px; + max-width: 40%; + image-rendering: pixelated; + border: 1px solid light-dark(#ccc, #444); + } + .content-stream-parsed { + display: none; + } + .content-stream-raw { + display: inline; + } + &.parse-cs-active { + .content-stream-parsed { + display: inline; + } + .content-stream-raw { + display: none; + } + } + .content-stream { + line-height: 1.8; + } + .cs-instruction { + display: block; + white-space: nowrap; + } + .token-cmd { + color: light-dark(#0070c1, #9cdcfe); + font-weight: bold; + } + .token-num { + color: light-dark(#098658, #b5cea8); + } + .token-str { + color: light-dark(#a31515, #ce9178); + } + .token-name { + color: light-dark(#795e26, #dcdcaa); + } + .token-bool { + color: light-dark(#00f, #569cd6); + } + .token-null { + color: light-dark(#767676, #808080); + } + .token-ref { + color: light-dark(#007b6e, #4ec9b0); + } + .token-array, + .token-dict { + color: light-dark(#1e1e1e, #d4d4d4); + } +} diff --git a/web/pdf_internal_viewer.html b/web/pdf_internal_viewer.html new file mode 100644 index 0000000000000..b31ae57bdccce --- /dev/null +++ b/web/pdf_internal_viewer.html @@ -0,0 +1,82 @@ + + + + + + + PDF Internal Structure Viewer + + + + + +
+ + +
+

+ + +
+ + +
+
+
+ + + + + + + + + + diff --git a/web/pdf_internal_viewer.js b/web/pdf_internal_viewer.js new file mode 100644 index 0000000000000..6504b3cd597c9 --- /dev/null +++ b/web/pdf_internal_viewer.js @@ -0,0 +1,813 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { getDocument, GlobalWorkerOptions, PasswordResponses } from "pdfjs-lib"; + +GlobalWorkerOptions.workerSrc = + typeof PDFJSDev === "undefined" + ? "../src/pdf.worker.js" + : "../build/pdf.worker.mjs"; + +const ARROW_COLLAPSED = "▶"; +const ARROW_EXPANDED = "▼"; + +// Matches indirect object references such as "10 0 R". +const REF_RE = /^\d+ \d+ R$/; + +// Parses "num" into { page: num }, or "numR"/"numRgen" into { ref: {num,gen} }. +// Returns null for invalid input. +function parseGoToInput(str) { + const m = str.trim().match(/^(\d+)(R(\d+)?)?$/i); + if (!m) { + return null; + } + if (!m[2]) { + return { page: parseInt(m[1]) }; + } + return { + ref: { num: parseInt(m[1]), gen: m[3] !== undefined ? parseInt(m[3]) : 0 }, + }; +} + +// Parses "num", "numR" or "numRgen" into { num, gen }, or returns null. +// Used for URL hash param parsing where a bare number means a ref, not a page. +function parseRefInput(str) { + const m = str.trim().match(/^(\d+)(?:R(\d+)?)?$/i); + if (!m) { + return null; + } + return { num: parseInt(m[1]), gen: m[2] !== undefined ? parseInt(m[2]) : 0 }; +} + +let pdfDoc = null; + +// Cache for getRawData results, keyed by "num:gen". Cleared on each new +// document. +const refCache = new Map(); + +function updateParseCSClass() { + document + .getElementById("tree") + .classList.toggle( + "parse-cs-active", + document.getElementById("parse-content-stream").checked + ); +} + +async function loadTree(data, rootLabel = null) { + const treeEl = document.getElementById("tree"); + const rootNode = renderNode(rootLabel, await pdfDoc.getRawData(data), pdfDoc); + treeEl.replaceChildren(rootNode); + rootNode.querySelector("[role='button']").click(); +} + +async function openDocument(source, name) { + const statusEl = document.getElementById("status"); + const pdfInfoEl = document.getElementById("pdf-info"); + const gotoInput = document.getElementById("goto-input"); + + statusEl.textContent = `Loading ${name}…`; + pdfInfoEl.textContent = ""; + refCache.clear(); + + if (pdfDoc) { + await pdfDoc.destroy(); + pdfDoc = null; + } + + const loadingTask = getDocument({ ...source, wasmUrl: "wasm/" }); + loadingTask.onPassword = (updateCallback, reason) => { + const dialog = document.getElementById("password-dialog"); + const title = document.getElementById("password-dialog-title"); + const input = document.getElementById("password-input"); + const cancelBtn = document.getElementById("password-cancel"); + + title.textContent = + reason === PasswordResponses.INCORRECT_PASSWORD + ? "Incorrect password. Please try again:" + : "This PDF is password-protected. Please enter the password:"; + input.value = ""; + dialog.showModal(); + + const onSubmit = () => { + cleanup(); + updateCallback(input.value); + }; + const onCancel = () => { + cleanup(); + dialog.close(); + updateCallback(new Error("Password prompt cancelled.")); + }; + const cleanup = () => { + dialog.removeEventListener("close", onSubmit); + cancelBtn.removeEventListener("click", onCancel); + }; + + dialog.addEventListener("close", onSubmit, { once: true }); + cancelBtn.addEventListener("click", onCancel, { once: true }); + }; + pdfDoc = await loadingTask.promise; + const plural = pdfDoc.numPages !== 1 ? "s" : ""; + pdfInfoEl.textContent = `${name} — ${pdfDoc.numPages} page${plural}`; + statusEl.textContent = ""; + gotoInput.disabled = false; + gotoInput.value = ""; +} + +function showError(err) { + document.getElementById("status").textContent = "Error: " + err.message; + const msg = document.createElement("div"); + msg.setAttribute("role", "alert"); + msg.textContent = err.message; + document.getElementById("tree").append(msg); +} + +document.getElementById("file-input").value = ""; + +document + .getElementById("parse-content-stream") + .addEventListener("change", updateParseCSClass); + +updateParseCSClass(); + +document.getElementById("file-input").addEventListener("change", async e => { + const file = e.target.files[0]; + if (!file) { + return; + } + try { + await openDocument({ data: await file.arrayBuffer() }, file.name); + await loadTree({ ref: null }, "Trailer"); + } catch (err) { + showError(err); + } +}); + +(async () => { + const searchParams = new URLSearchParams(location.search); + const hashParams = new URLSearchParams(location.hash.slice(1)); + const fileUrl = searchParams.get("file"); + if (!fileUrl) { + return; + } + try { + await openDocument({ url: fileUrl }, fileUrl.split("/").pop()); + const refStr = hashParams.get("ref"); + const pageStr = hashParams.get("page"); + if (refStr) { + const ref = parseRefInput(refStr); + if (ref) { + document.getElementById("goto-input").value = refStr; + await loadTree({ ref }); + return; + } + } + if (pageStr) { + const page = parseInt(pageStr); + if (Number.isInteger(page) && page >= 1 && page <= pdfDoc.numPages) { + document.getElementById("goto-input").value = pageStr; + await loadTree({ page }); + return; + } + } + await loadTree({ ref: null }, "Trailer"); + } catch (err) { + showError(err); + } +})(); + +document.getElementById("goto-input").addEventListener("keydown", async e => { + if (e.key !== "Enter" || !pdfDoc) { + return; + } + const input = e.target; + if (input.value.trim() === "") { + input.setAttribute("aria-invalid", "false"); + await loadTree({ ref: null }, "Trailer"); + return; + } + const result = parseGoToInput(input.value); + if (!result) { + input.setAttribute("aria-invalid", "true"); + return; + } + if ( + result.page !== undefined && + (result.page < 1 || result.page > pdfDoc.numPages) + ) { + input.setAttribute("aria-invalid", "true"); + return; + } + input.setAttribute("aria-invalid", "false"); + await (result.page !== undefined + ? loadTree({ page: result.page }) + : loadTree({ ref: result.ref })); +}); + +document.getElementById("goto-input").addEventListener("input", e => { + if (e.target.value.trim() === "") { + e.target.setAttribute("aria-invalid", "false"); + } +}); + +// PDF Name objects arrive as { name: "..." } after structured clone. +function isPDFName(val) { + return ( + val !== null && + typeof val === "object" && + !Array.isArray(val) && + typeof val.name === "string" && + Object.keys(val).length === 1 + ); +} + +// Ref objects arrive as { num: N, gen: G } after structured clone. +function isRefObject(val) { + return ( + val !== null && + typeof val === "object" && + !Array.isArray(val) && + typeof val.num === "number" && + typeof val.gen === "number" && + Object.keys(val).length === 2 + ); +} + +function refLabel(ref) { + return ref.gen !== 0 ? `${ref.num}R${ref.gen}` : `${ref.num}R`; +} + +// Page content streams: +// { contentStream: true, instructions, cmdNames, rawContents }. +function isContentStream(val) { + return ( + val !== null && + typeof val === "object" && + val.contentStream === true && + Array.isArray(val.instructions) && + Array.isArray(val.rawContents) + ); +} + +// Streams: { dict, bytes }, { dict, imageData }, +// or { dict, contentStream: true, instructions, cmdNames } (Form XObject). +function isStream(val) { + return ( + val !== null && + typeof val === "object" && + !Array.isArray(val) && + Object.prototype.hasOwnProperty.call(val, "dict") && + (Object.prototype.hasOwnProperty.call(val, "bytes") || + Object.prototype.hasOwnProperty.call(val, "imageData") || + val.contentStream === true) + ); +} + +function isImageStream(val) { + return ( + isStream(val) && Object.prototype.hasOwnProperty.call(val, "imageData") + ); +} + +function isFormXObjectStream(val) { + return isStream(val) && val.contentStream === true; +} + +/** + * Render one key/value pair as a
. + * @param {string|null} key Dict key, array index, or null for root. + * @param {*} value + * @param {PDFDocumentProxy} doc + */ +function renderNode(key, value, doc) { + const node = document.createElement("div"); + node.className = "node"; + node.setAttribute("role", "treeitem"); + node.tabIndex = -1; + + if (key !== null) { + const keyEl = document.createElement("span"); + keyEl.className = "key"; + keyEl.textContent = key; + node.append(keyEl); + const sep = document.createElement("span"); + sep.className = "separator"; + sep.textContent = ": "; + node.append(sep); + } + + node.append(renderValue(value, doc)); + return node; +} + +/** + * Populate a container element with the direct children of a value. + * Used both by renderValue (inside expandables) and renderRef (directly + * into the ref's children container, avoiding an extra toggle level). + */ +function buildChildren(value, doc, container) { + if (isStream(value)) { + for (const [k, v] of Object.entries(value.dict)) { + container.append(renderNode(k, v, doc)); + } + if (isImageStream(value)) { + container.append(renderImageData(value.imageData)); + } else if (isFormXObjectStream(value)) { + const contentNode = document.createElement("div"); + contentNode.className = "node"; + contentNode.setAttribute("role", "treeitem"); + contentNode.tabIndex = -1; + contentNode.append(makeSpan("key", "content")); + contentNode.append(makeSpan("separator", ": ")); + + const parsedEl = document.createElement("span"); + parsedEl.className = "content-stream-parsed"; + parsedEl.append( + renderExpandable( + `[Content Stream, ${value.instructions.length} instructions]`, + "stream-label", + c => buildInstructionLines(value, c) + ) + ); + + const rawEl = document.createElement("span"); + rawEl.className = "content-stream-raw"; + const byteLabel = makeSpan( + "stream-label", + `<${value.bytes.length} raw bytes>` + ); + rawEl.append(byteLabel); + const bytesContentEl = document.createElement("div"); + bytesContentEl.className = "bytes-content"; + bytesContentEl.append(formatBytes(value.bytes)); + rawEl.append(bytesContentEl); + + contentNode.append(parsedEl, rawEl); + container.append(contentNode); + } else { + const byteNode = document.createElement("div"); + byteNode.className = "node"; + const keyEl = document.createElement("span"); + keyEl.className = "key"; + keyEl.textContent = "bytes"; + const sep = document.createElement("span"); + sep.className = "separator"; + sep.textContent = ": "; + const valEl = document.createElement("span"); + valEl.className = "stream-label"; + valEl.textContent = `<${value.bytes.length} raw bytes>`; + byteNode.append(keyEl, sep, valEl); + container.append(byteNode); + + const bytesContentEl = document.createElement("div"); + bytesContentEl.className = "bytes-content"; + bytesContentEl.append(formatBytes(value.bytes)); + container.append(bytesContentEl); + } + } else if (Array.isArray(value)) { + value.forEach((v, i) => container.append(renderNode(String(i), v, doc))); + } else if (value !== null && typeof value === "object") { + for (const [k, v] of Object.entries(value)) { + container.append(renderNode(k, v, doc)); + } + } else { + container.append(renderNode(null, value, doc)); + } +} + +/** + * Render a single content-stream token as a styled span. + */ +function renderToken(token) { + if (!token) { + return makeSpan("token-null", "null"); + } + switch (token.type) { + case "cmd": + return makeSpan("token-cmd", token.value); + case "name": + return makeSpan("token-name", "/" + token.value); + case "ref": + return makeSpan("token-ref", `${token.num} ${token.gen} R`); + case "number": + return makeSpan("token-num", String(token.value)); + case "string": + return makeSpan("token-str", JSON.stringify(token.value)); + case "boolean": + return makeSpan("token-bool", String(token.value)); + case "null": + return makeSpan("token-null", "null"); + case "array": { + const span = document.createElement("span"); + span.className = "token-array"; + span.append(makeSpan("bracket", "[")); + for (const item of token.value) { + span.append(document.createTextNode(" ")); + span.append(renderToken(item)); + } + span.append(document.createTextNode(" ")); + span.append(makeSpan("bracket", "]")); + return span; + } + case "dict": { + const span = document.createElement("span"); + span.className = "token-dict"; + span.append(makeSpan("bracket", "<<")); + for (const [k, v] of Object.entries(token.value)) { + span.append(document.createTextNode(" ")); + span.append(makeSpan("token-name", "/" + k)); + span.append(document.createTextNode(" ")); + span.append(renderToken(v)); + } + span.append(document.createTextNode(" ")); + span.append(makeSpan("bracket", ">>")); + return span; + } + default: + return makeSpan("token-unknown", String(token.value ?? token.type)); + } +} + +/** + * Populate container with one .cs-instruction div per instruction. + * Shared by Page content streams and Form XObject streams. + */ +function buildInstructionLines(val, container) { + const pre = document.createElement("div"); + pre.className = "content-stream"; + let depth = 0; + for (const instr of val.instructions) { + if (instr.cmd === "ET" || instr.cmd === "Q" || instr.cmd === "EMC") { + depth = Math.max(0, depth - 1); + } + const line = document.createElement("div"); + line.className = "cs-instruction"; + if (depth > 0) { + line.style.paddingInlineStart = `${depth * 1.5}em`; + } + for (const arg of instr.args) { + line.append(renderToken(arg)); + line.append(document.createTextNode(" ")); + } + if (instr.cmd !== null) { + const cmdEl = makeSpan("token-cmd", instr.cmd); + const opsName = val.cmdNames[instr.cmd]; + if (opsName) { + cmdEl.title = opsName; + } + line.append(cmdEl); + } + pre.append(line); + if (instr.cmd === "BT" || instr.cmd === "q" || instr.cmd === "BDC") { + depth++; + } + } + container.append(pre); +} + +/** + * Render Page content stream as two pre-built views toggled by CSS: + * - .content-stream-parsed: expandable colorized instruction widget + * - .content-stream-raw: ref widget(s) mirroring the unparsed display + * The active view is controlled by the "parse-cs-active" class on #tree. + */ +function renderContentStream(val, doc) { + const frag = document.createDocumentFragment(); + + const parsedEl = document.createElement("span"); + parsedEl.className = "content-stream-parsed"; + parsedEl.append( + renderExpandable( + `[Content Stream, ${val.instructions.length} instructions]`, + "stream-label", + container => buildInstructionLines(val, container) + ) + ); + + const rawEl = document.createElement("span"); + rawEl.className = "content-stream-raw"; + const rawVal = + val.rawContents.length === 1 ? val.rawContents[0] : val.rawContents; + rawEl.append(renderValue(rawVal, doc)); + + frag.append(parsedEl, rawEl); + return frag; +} + +/** + * Render a value inline (primitive) or as an expandable widget. + * Returns a Node or DocumentFragment suitable for appendChild(). + */ +function renderValue(value, doc) { + // Ref string ("10 0 R") – lazy expandable via getRawData() + if (typeof value === "string" && REF_RE.test(value)) { + return renderRef(value, doc); + } + + // Ref object { num, gen } – lazy expandable via getRawData() + if (isRefObject(value)) { + return renderRef(value, doc); + } + + // PDF Name → /Name + if (isPDFName(value)) { + return makeSpan("name-value", "/" + value.name); + } + + // Content stream (Page Contents) → two pre-built views toggled by CSS + if (isContentStream(value)) { + return renderContentStream(value, doc); + } + + // Stream → expandable showing dict entries + byte count or image preview + if (isStream(value)) { + return renderExpandable("[Stream]", "stream-label", container => + buildChildren(value, doc, container) + ); + } + + // Plain object (dict) + if (value !== null && typeof value === "object" && !Array.isArray(value)) { + const keys = Object.keys(value); + if (keys.length === 0) { + return makeSpan("bracket", "{}"); + } + return renderExpandable(`{${keys.length}}`, "bracket", container => + buildChildren(value, doc, container) + ); + } + + // Array + if (Array.isArray(value)) { + if (value.length === 0) { + return makeSpan("bracket", "[]"); + } + return renderExpandable(`[${value.length}]`, "bracket", container => + buildChildren(value, doc, container) + ); + } + + // Primitives + if (typeof value === "string") { + return makeSpan("str-value", JSON.stringify(value)); + } + if (typeof value === "number") { + return makeSpan("num-value", String(value)); + } + if (typeof value === "boolean") { + return makeSpan("bool-value", String(value)); + } + return makeSpan("null-value", "null"); +} + +/** + * Build a lazy-loading expand/collapse widget for a ref (string or object). + * Results are cached in `refCache` keyed by "num:gen". + */ +function renderRef(ref, doc) { + // Derive the cache key and display label from whichever form we received. + // String refs look like "10 0 R"; object refs are { num, gen }. + let cacheKey, label; + if (typeof ref === "string") { + const parts = ref.split(" "); + cacheKey = `${parts[0]}:${parts[1]}`; + label = ref; + } else { + cacheKey = `${ref.num}:${ref.gen}`; + label = refLabel(ref); + } + + const frag = document.createDocumentFragment(); + + const toggleEl = document.createElement("span"); + toggleEl.textContent = ARROW_COLLAPSED; + toggleEl.setAttribute("role", "button"); + toggleEl.setAttribute("tabindex", "0"); + toggleEl.setAttribute("aria-expanded", "false"); + toggleEl.setAttribute("aria-label", `Expand reference ${label}`); + + const refEl = document.createElement("span"); + refEl.className = "ref"; + refEl.textContent = label; + refEl.setAttribute("aria-hidden", "true"); + + const childrenEl = document.createElement("div"); + childrenEl.className = "hidden"; + childrenEl.setAttribute("role", "group"); + childrenEl.setAttribute("aria-label", `Contents of reference ${label}`); + + let open = false; + let loaded = false; + + const onToggle = async () => { + open = !open; + toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED; + toggleEl.setAttribute("aria-expanded", String(open)); + childrenEl.classList.toggle("hidden", !open); + + if (open && !loaded) { + loaded = true; + const spinner = document.createElement("div"); + spinner.setAttribute("role", "status"); + spinner.textContent = "Loading…"; + childrenEl.append(spinner); + + try { + if (!refCache.has(cacheKey)) { + refCache.set(cacheKey, doc.getRawData({ ref })); + } + const result = await refCache.get(cacheKey); + childrenEl.replaceChildren(); + buildChildren(result, doc, childrenEl); + } catch (err) { + const errEl = document.createElement("div"); + errEl.setAttribute("role", "alert"); + errEl.textContent = "Error: " + err.message; + childrenEl.replaceChildren(errEl); + } + } + }; + + toggleEl.addEventListener("click", onToggle); + toggleEl.addEventListener("keydown", e => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggle(); + } + }); + refEl.addEventListener("click", onToggle); + + frag.append(toggleEl); + frag.append(refEl); + frag.append(childrenEl); + return frag; +} + +/** + * Build a synchronous expand/collapse widget. + * @param {string} label Text shown on the collapsed line. + * @param {string} labelClass CSS class for the label. + * @param {function} buildFn Called with (containerEl) on first open. + */ +function renderExpandable(label, labelClass, buildFn) { + const frag = document.createDocumentFragment(); + + const toggleEl = document.createElement("span"); + toggleEl.textContent = ARROW_COLLAPSED; + toggleEl.setAttribute("role", "button"); + toggleEl.setAttribute("tabindex", "0"); + toggleEl.setAttribute("aria-expanded", "false"); + toggleEl.setAttribute("aria-label", `Expand ${label}`); + + const labelEl = document.createElement("span"); + labelEl.className = labelClass; + labelEl.textContent = label; + labelEl.setAttribute("aria-hidden", "true"); + + const childrenEl = document.createElement("div"); + childrenEl.className = "hidden"; + childrenEl.setAttribute("role", "group"); + childrenEl.setAttribute("aria-label", `Contents of ${label}`); + + let open = false; + let built = false; + + const onToggle = () => { + open = !open; + toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED; + toggleEl.setAttribute("aria-expanded", String(open)); + childrenEl.classList.toggle("hidden", !open); + if (open && !built) { + built = true; + buildFn(childrenEl); + } + }; + + toggleEl.addEventListener("click", onToggle); + toggleEl.addEventListener("keydown", e => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggle(); + } + }); + labelEl.addEventListener("click", onToggle); + + frag.append(toggleEl); + frag.append(labelEl); + frag.append(childrenEl); + return frag; +} + +/** + * Build a DocumentFragment for the byte string. + * Printable ASCII (0x20–0x7e) runs become plain text nodes. + * Consecutive non-printable bytes are grouped into a single + * with each byte as uppercase XX separated by + * a narrow space. + */ +/** + * Render image data (RGBA Uint8ClampedArray) into a node. + */ +function renderImageData({ width, height, data }) { + const node = document.createElement("div"); + node.className = "node"; + const keyEl = document.createElement("span"); + keyEl.className = "key"; + keyEl.textContent = "imageData"; + const sep = document.createElement("span"); + sep.className = "separator"; + sep.textContent = ": "; + const info = document.createElement("span"); + info.className = "stream-label"; + info.textContent = `<${width}×${height}>`; + node.append(keyEl, sep, info); + + const canvas = document.createElement("canvas"); + canvas.className = "image-preview"; + canvas.width = width; + canvas.height = height; + canvas.setAttribute("aria-label", `Image preview ${width}×${height}`); + const ctx = canvas.getContext("2d"); + const imgData = new ImageData(new Uint8ClampedArray(data), width, height); + ctx.putImageData(imgData, 0, 0); + node.append(canvas); + return node; +} + +function isMostlyText(str) { + let printable = 0; + for (let i = 0; i < str.length; i++) { + const c = str.charCodeAt(i); + if (c >= 0x20 && c <= 0x7e) { + printable++; + } + } + return str.length > 0 && printable / str.length >= 0.8; +} + +function formatBytes(str) { + const mostlyText = isMostlyText(str); + const frag = document.createDocumentFragment(); + + if (!mostlyText) { + // Binary content: render every byte as hex in a single span. + const span = document.createElement("span"); + span.className = "bytes-hex"; + const hexParts = []; + for (let i = 0; i < str.length; i++) { + hexParts.push( + str.charCodeAt(i).toString(16).toUpperCase().padStart(2, "0") + ); + } + span.textContent = hexParts.join("\u00B7\u200B"); + frag.append(span); + return frag; + } + + // Text content: printable ASCII + 0x0A as-is, other bytes as hex spans. + const isPrintable = c => (c >= 0x20 && c <= 0x7e) || c === 0x0a; + let i = 0; + while (i < str.length) { + const code = str.charCodeAt(i); + if (isPrintable(code)) { + let run = ""; + while (i < str.length && isPrintable(str.charCodeAt(i))) { + run += str[i++]; + } + frag.append(document.createTextNode(run)); + } else { + const span = document.createElement("span"); + span.className = "bytes-hex"; + const hexParts = []; + while (i < str.length && !isPrintable(str.charCodeAt(i))) { + hexParts.push( + str.charCodeAt(i).toString(16).toUpperCase().padStart(2, "0") + ); + i++; + } + span.textContent = hexParts.join("\u00B7\u200B"); + frag.append(span); + } + } + return frag; +} + +/** Create a with the given class and text content. */ +function makeSpan(className, text) { + const span = document.createElement("span"); + span.className = className; + span.textContent = text; + return span; +}