diff --git a/gulpfile.mjs b/gulpfile.mjs index 4dd951834bd71..fb67966b184a2 100644 --- a/gulpfile.mjs +++ b/gulpfile.mjs @@ -65,6 +65,7 @@ const IMAGE_DECODERS_LEGACY_DIR = BUILD_DIR + "image_decoders-legacy/"; const DEFAULT_PREFERENCES_DIR = BUILD_DIR + "default_preferences/"; const MINIFIED_DIR = BUILD_DIR + "minified/"; const MINIFIED_LEGACY_DIR = BUILD_DIR + "minified-legacy/"; +const INTERNAL_VIEWER_DIR = BUILD_DIR + "internal-viewer/"; const JSDOC_BUILD_DIR = BUILD_DIR + "jsdoc/"; const GH_PAGES_DIR = BUILD_DIR + "gh-pages/"; const DIST_DIR = BUILD_DIR + "dist/"; @@ -2368,6 +2369,52 @@ gulp.task("check_l10n", function (done) { }); }); +function createInternalViewerBundle(defines) { + const viewerFileConfig = createWebpackConfig(defines, { + filename: "pdf_internal_viewer.mjs", + library: { + type: "module", + }, + }); + return gulp + .src("./web/pdf_internal_viewer.js", { encoding: false }) + .pipe(webpack2Stream(viewerFileConfig)); +} + +function buildInternalViewer(defines, dir) { + fs.rmSync(dir, { recursive: true, force: true }); + + return ordered([ + createMainBundle(defines).pipe(gulp.dest(dir + "build")), + createWorkerBundle(defines).pipe(gulp.dest(dir + "build")), + createInternalViewerBundle(defines).pipe(gulp.dest(dir + "web")), + preprocessHTML("web/pdf_internal_viewer.html", defines).pipe( + gulp.dest(dir + "web") + ), + preprocessCSS("web/pdf_internal_viewer.css", defines) + .pipe( + postcss([ + postcssDirPseudoClass(), + discardCommentsCSS(), + postcssNesting(), + postcssLightDarkFunction({ preserve: true }), + autoprefixer(AUTOPREFIXER_CONFIG), + ]) + ) + .pipe(gulp.dest(dir + "web")), + createWasmBundle().pipe(gulp.dest(dir + "web/wasm")), + ]); +} + +gulp.task( + "internal-viewer", + gulp.series(createBuildNumber, function createInternalViewer() { + console.log("\n### Creating internal viewer"); + const defines = { ...DEFINES, GENERIC: true }; + return buildInternalViewer(defines, INTERNAL_VIEWER_DIR); + }) +); + function ghPagesPrepare() { console.log("\n### Creating web site"); @@ -2391,6 +2438,13 @@ function ghPagesPrepare() { gulp .src(JSDOC_BUILD_DIR + "**/*", { base: JSDOC_BUILD_DIR, encoding: false }) .pipe(gulp.dest(GH_PAGES_DIR + "api/draft/")), + gulp + .src(INTERNAL_VIEWER_DIR + "**/*", { + base: INTERNAL_VIEWER_DIR, + encoding: false, + removeBOM: false, + }) + .pipe(gulp.dest(GH_PAGES_DIR + "internal-viewer/")), ]); } @@ -2442,6 +2496,7 @@ gulp.task( gulp.series( "generic", "generic-legacy", + "internal-viewer", "jsdoc", ghPagesPrepare, "metalsmith" diff --git a/src/core/document.js b/src/core/document.js index a4d447f598efb..f0a291e9548d8 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -22,6 +22,7 @@ import { isArrayEqual, makeArr, objectSize, + OPS, PageActionEventType, RenderingIntentFlag, shadow, @@ -37,6 +38,17 @@ import { PopupAnnotation, WidgetAnnotation, } from "./annotation.js"; +import { + Cmd, + Dict, + EOF, + isName, + isRefsEqual, + Name, + Ref, + RefSet, + RefSetCache, +} from "./primitives.js"; import { collectActions, getInheritableProperty, @@ -51,16 +63,9 @@ import { XRefEntryException, XRefParseException, } from "./core_utils.js"; -import { - Dict, - isName, - isRefsEqual, - Name, - Ref, - RefSet, - RefSetCache, -} from "./primitives.js"; +import { EvaluatorPreprocessor, PartialEvaluator } from "./evaluator.js"; import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js"; +import { Lexer, Linearization, Parser } from "./parser.js"; import { NullStream, Stream } from "./stream.js"; import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./calculate_md5.js"; @@ -68,10 +73,11 @@ import { Catalog } from "./catalog.js"; import { clearGlobalCaches } from "./cleanup_helper.js"; import { DatasetReader } from "./dataset_reader.js"; import { Intersector } from "./intersector.js"; -import { Linearization } from "./parser.js"; +import { LocalColorSpaceCache } from "./image_utils.js"; import { ObjectLoader } from "./object_loader.js"; import { OperatorList } from "./operator_list.js"; -import { PartialEvaluator } from "./evaluator.js"; +import { PDFFunctionFactory } from "./function.js"; +import { PDFImage } from "./image.js"; import { StreamsSequenceStream } from "./decode_stream.js"; import { StructTreePage } from "./struct_tree.js"; import { XFAFactory } from "./xfa/factory.js"; @@ -2030,6 +2036,219 @@ class PDFDocument { AnnotationFactory.createGlobals(this.pdfManager) ); } + + async toJSObject(value, firstCall = true) { + if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { + throw new Error("Not implemented: toJSObject"); + } + + if (value === null && firstCall) { + return this.toJSObject(this.xref.trailer, false); + } + if (value instanceof Dict) { + const obj = Object.create(null); + const isPage = isName(value.get("Type"), "Page"); + for (const [key, val] of value.getRawEntries()) { + obj[key] = + isPage && key === "Contents" + ? _getContentTokens(val, this.xref) + : await this.toJSObject(val, false); + } + return obj; + } + if (Array.isArray(value)) { + return Promise.all(value.map(v => this.toJSObject(v, false))); + } + if (value instanceof Ref) { + if (firstCall) { + return this.toJSObject(this.xref.fetch(value), false); + } + const result = Object.create(null); + result.num = value.num; + result.gen = value.gen; + return result; + } + if (value instanceof BaseStream) { + const { dict } = value; + const obj = Object.create(null); + obj.dict = await this.toJSObject(dict, false); + + if ( + isName(dict.get("Type"), "XObject") && + isName(dict.get("Subtype"), "Image") + ) { + try { + const pdfFunctionFactory = new PDFFunctionFactory({ + xref: this.xref, + isEvalSupported: this.pdfManager.evaluatorOptions.isEvalSupported, + }); + const imageObj = await PDFImage.buildImage({ + xref: this.xref, + res: Dict.empty, + image: value, + pdfFunctionFactory, + globalColorSpaceCache: this.catalog.globalColorSpaceCache, + localColorSpaceCache: new LocalColorSpaceCache(), + }); + const imgData = await imageObj.createImageData( + /* forceRGBA = */ true, + /* isOffscreenCanvasSupported = */ false + ); + obj.imageData = { + width: imgData.width, + height: imgData.height, + kind: imgData.kind, + data: imgData.data, + }; + return obj; + } catch { + // Fall through to regular byte stream if image decoding fails. + } + } + + if (isName(dict.get("Subtype"), "Form")) { + obj.bytes = value.getString(); + value.reset(); + const { instructions, cmdNames } = _groupIntoInstructions( + _tokenizeStream(value, this.xref) + ); + obj.contentStream = true; + obj.instructions = instructions; + obj.cmdNames = cmdNames; + return obj; + } + + obj.bytes = value.getString(); + return obj; + } + return value; + } +} + +function _tokenizeStream(stream, xref) { + const tokens = []; + const parser = new Parser({ + lexer: new Lexer(stream), + xref, + allowStreams: false, + }); + while (true) { + let obj; + try { + obj = parser.getObj(); + } catch { + break; + } + if (obj === EOF) { + break; + } + const token = _tokenToJSObject(obj); + if (token !== null) { + tokens.push(token); + } + } + return tokens; +} + +function _getContentTokens(contentsVal, xref) { + const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal]; + const rawContents = []; + const tokens = []; + for (const rawRef of refs) { + if (rawRef instanceof Ref) { + rawContents.push({ num: rawRef.num, gen: rawRef.gen }); + } + const stream = xref.fetchIfRef(rawRef); + if (!(stream instanceof BaseStream)) { + continue; + } + tokens.push(..._tokenizeStream(stream, xref)); + } + const { instructions, cmdNames } = _groupIntoInstructions(tokens); + return { contentStream: true, instructions, cmdNames, rawContents }; +} + +// Lazily-built reverse map: OPS numeric id → property name string. +let _opsIdToName = null; + +function _getOpsIdToName() { + if (!_opsIdToName) { + _opsIdToName = Object.create(null); + for (const [name, id] of Object.entries(OPS)) { + _opsIdToName[id] = name; + } + } + return _opsIdToName; +} + +function _groupIntoInstructions(tokens) { + const { opMap } = EvaluatorPreprocessor; + const opsIdToName = _getOpsIdToName(); + const instructions = []; + const cmdNames = Object.create(null); + const argBuffer = []; + for (const token of tokens) { + if (token.type !== "cmd") { + argBuffer.push(token); + continue; + } + const op = opMap[token.value]; + if (op && !(token.value in cmdNames)) { + cmdNames[token.value] = opsIdToName[op.id]; + } + let args; + if (!op || op.variableArgs) { + // Unknown command or variable args: consume all pending args. + args = argBuffer.splice(0); + } else { + // Fixed args: consume exactly numArgs, orphan the rest. + const orphanCount = Math.max(0, argBuffer.length - op.numArgs); + for (let i = 0; i < orphanCount; i++) { + instructions.push({ cmd: null, args: [argBuffer.shift()] }); + } + args = argBuffer.splice(0); + } + instructions.push({ cmd: token.value, args }); + } + for (const t of argBuffer) { + instructions.push({ cmd: null, args: [t] }); + } + return { instructions, cmdNames }; +} + +function _tokenToJSObject(obj) { + if (obj instanceof Cmd) { + return { type: "cmd", value: obj.cmd }; + } + if (obj instanceof Name) { + return { type: "name", value: obj.name }; + } + if (obj instanceof Ref) { + return { type: "ref", num: obj.num, gen: obj.gen }; + } + if (Array.isArray(obj)) { + return { type: "array", value: obj.map(_tokenToJSObject) }; + } + if (obj instanceof Dict) { + const result = Object.create(null); + for (const [key, val] of obj.getRawEntries()) { + result[key] = _tokenToJSObject(val); + } + return { type: "dict", value: result }; + } + if (typeof obj === "number") { + return { type: "number", value: obj }; + } + if (typeof obj === "string") { + return { type: "string", value: obj }; + } + if (typeof obj === "boolean") { + return { type: "boolean", value: obj }; + } + if (obj === null) { + return { type: "null" }; + } + return null; } export { Page, PDFDocument }; diff --git a/src/core/worker.js b/src/core/worker.js index 29aa80cf82a68..59e65e9865d39 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -952,6 +952,22 @@ class WorkerMessageHandler { return pdfManager.fontFallback(data.id, handler); }); + handler.on("GetRawData", async function ({ ref, page }) { + if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { + throw new Error("Not implemented: GetRawData"); + } + let value = null; + if (page >= 1) { + value = (await pdfManager.ensureCatalog("getPageDict", [page - 1]))[1]; + } else if (ref) { + value = + typeof ref === "string" + ? Ref.fromString(ref) + : Ref.get(ref.num, ref.gen); + } + return pdfManager.ensureDoc("toJSObject", [value]); + }); + handler.on("Cleanup", function (data) { return pdfManager.cleanup(/* manuallyTriggered = */ true); }); diff --git a/src/display/api.js b/src/display/api.js index f77b572722bce..dfada5fd1f0f2 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -1065,6 +1065,10 @@ class PDFDocumentProxy { return this._transport.downloadInfoCapability.promise; } + getRawData(data) { + return this._transport.getRawData(data); + } + /** * Cleans up resources allocated by the document on both the main and worker * threads. @@ -3173,6 +3177,10 @@ class WorkerTransport { return this.messageHandler.sendWithPromise("GetMarkInfo", null); } + getRawData(data) { + return this.messageHandler.sendWithPromise("GetRawData", data); + } + async startCleanup(keepLoadedFonts = false) { if (this.destroyed) { return; // No need to manually clean-up when destruction has started. diff --git a/web/pdf_internal_viewer.css b/web/pdf_internal_viewer.css new file mode 100644 index 0000000000000..553ef45b15ac3 --- /dev/null +++ b/web/pdf_internal_viewer.css @@ -0,0 +1,304 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +:root { + color-scheme: light dark; +} +* { + box-sizing: border-box; +} +body { + font-family: "Courier New", Courier, monospace; + margin: 0; + padding: 16px; + background: light-dark(#fff, #1e1e1e); + color: light-dark(#1e1e1e, #d4d4d4); + font-size: 13px; + line-height: 1.5; +} +#header { + display: flex; + align-items: baseline; + justify-content: space-between; + margin-bottom: 12px; + + h1 { + color: light-dark(#0070c1, #9cdcfe); + font-size: 1.2em; + margin: 0; + } + + #pdf-info { + font-family: system-ui, sans-serif; + font-size: 1.15em; + font-weight: 500; + color: light-dark(#1e1e1e, #d4d4d4); + } +} +#password-dialog { + background: light-dark(#fff, #2d2d2d); + color: light-dark(#1e1e1e, #d4d4d4); + border: 1px solid light-dark(#ccc, #555); + border-radius: 6px; + padding: 20px; + min-width: 320px; + + &::backdrop { + background: rgb(0 0 0 / 0.4); + } + + p { + margin: 0 0 12px; + } + + input { + display: block; + width: 100%; + margin-top: 4px; + background: light-dark(#fff, #3c3c3c); + color: light-dark(#1e1e1e, #d4d4d4); + border: 1px solid light-dark(#c8c8c8, #555); + border-radius: 3px; + padding: 4px 8px; + font-family: inherit; + font-size: inherit; + } + + .password-dialog-buttons { + display: flex; + justify-content: flex-end; + gap: 8px; + margin-top: 16px; + + button { + padding: 4px 14px; + border-radius: 3px; + border: 1px solid light-dark(#c8c8c8, #555); + background: light-dark(#f3f3f3, #3c3c3c); + color: inherit; + cursor: pointer; + font-family: inherit; + font-size: inherit; + + &:hover { + background: light-dark(#e0e0e0, #4a4a4a); + } + } + } +} +#controls { + position: sticky; + top: 0; + z-index: 1; + display: flex; + flex-direction: row; + align-items: center; + gap: 12px; + margin-bottom: 16px; + padding: 10px 14px; + background: light-dark(#f3f3f3, #252526); + border-radius: 4px; + border: 1px solid light-dark(#e0e0e0, #3c3c3c); + + label { + display: flex; + align-items: center; + gap: 4px; + color: light-dark(#6e6e6e, #888); + } + + #github-link { + margin-inline-start: auto; + display: flex; + align-items: center; + color: light-dark(#6e6e6e, #aaa); + text-decoration: none; + + &:hover { + color: light-dark(#1e1e1e, #fff); + } + + svg { + width: 20px; + height: 20px; + fill: currentColor; + } + } +} +#goto-input { + background: light-dark(#fff, #3c3c3c); + color: light-dark(#1e1e1e, #d4d4d4); + border: 1px solid light-dark(#c8c8c8, #555); + border-radius: 3px; + padding: 2px 6px; + font-family: inherit; + font-size: inherit; + + &:disabled { + opacity: 0.4; + } + &[aria-invalid="true"] { + border-color: #f66; + } +} +#status { + color: light-dark(#6e6e6e, #888); + font-style: italic; +} +#tree { + padding: 8px 12px; + background: light-dark(#f3f3f3, #252526); + border-radius: 4px; + border: 1px solid light-dark(#e0e0e0, #3c3c3c); + min-height: 60px; + + .node { + display: block; + padding: 1px 0; + } + .key { + color: light-dark(#0070c1, #9cdcfe); + } + .separator { + color: light-dark(#6e6e6e, #888); + } + [role="button"] { + display: inline-block; + width: 14px; + font-size: 0.7em; + color: light-dark(#666, #aaa); + cursor: pointer; + user-select: none; + vertical-align: middle; + } + [role="group"] { + padding-left: 20px; + border-left: 1px dashed light-dark(#d0d0d0, #444); + margin-left: 2px; + + &.hidden { + display: none; + } + } + .ref { + color: light-dark(#007b6e, #4ec9b0); + cursor: pointer; + text-decoration: underline dotted; + + &:hover { + color: light-dark(#065, #89d9c8); + } + } + .str-value { + color: light-dark(#a31515, #ce9178); + } + .num-value { + color: light-dark(#098658, #b5cea8); + } + .bool-value { + color: light-dark(#00f, #569cd6); + } + .null-value { + color: light-dark(#767676, #808080); + } + .name-value { + color: light-dark(#795e26, #dcdcaa); + } + .bracket { + color: light-dark(#6e6e6e, #888); + cursor: pointer; + user-select: none; + + &:hover { + color: light-dark(#444, #bbb); + } + } + .stream-label { + color: light-dark(#af00db, #c586c0); + font-style: italic; + } + [role="status"] { + color: light-dark(#6e6e6e, #888); + font-style: italic; + } + [role="alert"] { + color: #f66; + } + .bytes-content { + padding-left: 20px; + white-space: pre-wrap; + font-size: 1em; + opacity: 0.85; + color: light-dark(#a31515, #ce9178); + } + .bytes-hex { + font-family: monospace; + color: light-dark(#00f, #569cd6); + } + .image-preview { + display: block; + margin-top: 4px; + max-width: 40%; + image-rendering: pixelated; + border: 1px solid light-dark(#ccc, #444); + } + .content-stream-parsed { + display: none; + } + .content-stream-raw { + display: inline; + } + &.parse-cs-active { + .content-stream-parsed { + display: inline; + } + .content-stream-raw { + display: none; + } + } + .content-stream { + line-height: 1.8; + } + .cs-instruction { + display: block; + white-space: nowrap; + } + .token-cmd { + color: light-dark(#0070c1, #9cdcfe); + font-weight: bold; + } + .token-num { + color: light-dark(#098658, #b5cea8); + } + .token-str { + color: light-dark(#a31515, #ce9178); + } + .token-name { + color: light-dark(#795e26, #dcdcaa); + } + .token-bool { + color: light-dark(#00f, #569cd6); + } + .token-null { + color: light-dark(#767676, #808080); + } + .token-ref { + color: light-dark(#007b6e, #4ec9b0); + } + .token-array, + .token-dict { + color: light-dark(#1e1e1e, #d4d4d4); + } +} diff --git a/web/pdf_internal_viewer.html b/web/pdf_internal_viewer.html new file mode 100644 index 0000000000000..b31ae57bdccce --- /dev/null +++ b/web/pdf_internal_viewer.html @@ -0,0 +1,82 @@ + + + + + + + PDF Internal Structure Viewer + + + + + +
+ + +
+

+ + +
+ + +
+
+
+ + + + + + + + + + diff --git a/web/pdf_internal_viewer.js b/web/pdf_internal_viewer.js new file mode 100644 index 0000000000000..6504b3cd597c9 --- /dev/null +++ b/web/pdf_internal_viewer.js @@ -0,0 +1,813 @@ +/* Copyright 2026 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { getDocument, GlobalWorkerOptions, PasswordResponses } from "pdfjs-lib"; + +GlobalWorkerOptions.workerSrc = + typeof PDFJSDev === "undefined" + ? "../src/pdf.worker.js" + : "../build/pdf.worker.mjs"; + +const ARROW_COLLAPSED = "▶"; +const ARROW_EXPANDED = "▼"; + +// Matches indirect object references such as "10 0 R". +const REF_RE = /^\d+ \d+ R$/; + +// Parses "num" into { page: num }, or "numR"/"numRgen" into { ref: {num,gen} }. +// Returns null for invalid input. +function parseGoToInput(str) { + const m = str.trim().match(/^(\d+)(R(\d+)?)?$/i); + if (!m) { + return null; + } + if (!m[2]) { + return { page: parseInt(m[1]) }; + } + return { + ref: { num: parseInt(m[1]), gen: m[3] !== undefined ? parseInt(m[3]) : 0 }, + }; +} + +// Parses "num", "numR" or "numRgen" into { num, gen }, or returns null. +// Used for URL hash param parsing where a bare number means a ref, not a page. +function parseRefInput(str) { + const m = str.trim().match(/^(\d+)(?:R(\d+)?)?$/i); + if (!m) { + return null; + } + return { num: parseInt(m[1]), gen: m[2] !== undefined ? parseInt(m[2]) : 0 }; +} + +let pdfDoc = null; + +// Cache for getRawData results, keyed by "num:gen". Cleared on each new +// document. +const refCache = new Map(); + +function updateParseCSClass() { + document + .getElementById("tree") + .classList.toggle( + "parse-cs-active", + document.getElementById("parse-content-stream").checked + ); +} + +async function loadTree(data, rootLabel = null) { + const treeEl = document.getElementById("tree"); + const rootNode = renderNode(rootLabel, await pdfDoc.getRawData(data), pdfDoc); + treeEl.replaceChildren(rootNode); + rootNode.querySelector("[role='button']").click(); +} + +async function openDocument(source, name) { + const statusEl = document.getElementById("status"); + const pdfInfoEl = document.getElementById("pdf-info"); + const gotoInput = document.getElementById("goto-input"); + + statusEl.textContent = `Loading ${name}…`; + pdfInfoEl.textContent = ""; + refCache.clear(); + + if (pdfDoc) { + await pdfDoc.destroy(); + pdfDoc = null; + } + + const loadingTask = getDocument({ ...source, wasmUrl: "wasm/" }); + loadingTask.onPassword = (updateCallback, reason) => { + const dialog = document.getElementById("password-dialog"); + const title = document.getElementById("password-dialog-title"); + const input = document.getElementById("password-input"); + const cancelBtn = document.getElementById("password-cancel"); + + title.textContent = + reason === PasswordResponses.INCORRECT_PASSWORD + ? "Incorrect password. Please try again:" + : "This PDF is password-protected. Please enter the password:"; + input.value = ""; + dialog.showModal(); + + const onSubmit = () => { + cleanup(); + updateCallback(input.value); + }; + const onCancel = () => { + cleanup(); + dialog.close(); + updateCallback(new Error("Password prompt cancelled.")); + }; + const cleanup = () => { + dialog.removeEventListener("close", onSubmit); + cancelBtn.removeEventListener("click", onCancel); + }; + + dialog.addEventListener("close", onSubmit, { once: true }); + cancelBtn.addEventListener("click", onCancel, { once: true }); + }; + pdfDoc = await loadingTask.promise; + const plural = pdfDoc.numPages !== 1 ? "s" : ""; + pdfInfoEl.textContent = `${name} — ${pdfDoc.numPages} page${plural}`; + statusEl.textContent = ""; + gotoInput.disabled = false; + gotoInput.value = ""; +} + +function showError(err) { + document.getElementById("status").textContent = "Error: " + err.message; + const msg = document.createElement("div"); + msg.setAttribute("role", "alert"); + msg.textContent = err.message; + document.getElementById("tree").append(msg); +} + +document.getElementById("file-input").value = ""; + +document + .getElementById("parse-content-stream") + .addEventListener("change", updateParseCSClass); + +updateParseCSClass(); + +document.getElementById("file-input").addEventListener("change", async e => { + const file = e.target.files[0]; + if (!file) { + return; + } + try { + await openDocument({ data: await file.arrayBuffer() }, file.name); + await loadTree({ ref: null }, "Trailer"); + } catch (err) { + showError(err); + } +}); + +(async () => { + const searchParams = new URLSearchParams(location.search); + const hashParams = new URLSearchParams(location.hash.slice(1)); + const fileUrl = searchParams.get("file"); + if (!fileUrl) { + return; + } + try { + await openDocument({ url: fileUrl }, fileUrl.split("/").pop()); + const refStr = hashParams.get("ref"); + const pageStr = hashParams.get("page"); + if (refStr) { + const ref = parseRefInput(refStr); + if (ref) { + document.getElementById("goto-input").value = refStr; + await loadTree({ ref }); + return; + } + } + if (pageStr) { + const page = parseInt(pageStr); + if (Number.isInteger(page) && page >= 1 && page <= pdfDoc.numPages) { + document.getElementById("goto-input").value = pageStr; + await loadTree({ page }); + return; + } + } + await loadTree({ ref: null }, "Trailer"); + } catch (err) { + showError(err); + } +})(); + +document.getElementById("goto-input").addEventListener("keydown", async e => { + if (e.key !== "Enter" || !pdfDoc) { + return; + } + const input = e.target; + if (input.value.trim() === "") { + input.setAttribute("aria-invalid", "false"); + await loadTree({ ref: null }, "Trailer"); + return; + } + const result = parseGoToInput(input.value); + if (!result) { + input.setAttribute("aria-invalid", "true"); + return; + } + if ( + result.page !== undefined && + (result.page < 1 || result.page > pdfDoc.numPages) + ) { + input.setAttribute("aria-invalid", "true"); + return; + } + input.setAttribute("aria-invalid", "false"); + await (result.page !== undefined + ? loadTree({ page: result.page }) + : loadTree({ ref: result.ref })); +}); + +document.getElementById("goto-input").addEventListener("input", e => { + if (e.target.value.trim() === "") { + e.target.setAttribute("aria-invalid", "false"); + } +}); + +// PDF Name objects arrive as { name: "..." } after structured clone. +function isPDFName(val) { + return ( + val !== null && + typeof val === "object" && + !Array.isArray(val) && + typeof val.name === "string" && + Object.keys(val).length === 1 + ); +} + +// Ref objects arrive as { num: N, gen: G } after structured clone. +function isRefObject(val) { + return ( + val !== null && + typeof val === "object" && + !Array.isArray(val) && + typeof val.num === "number" && + typeof val.gen === "number" && + Object.keys(val).length === 2 + ); +} + +function refLabel(ref) { + return ref.gen !== 0 ? `${ref.num}R${ref.gen}` : `${ref.num}R`; +} + +// Page content streams: +// { contentStream: true, instructions, cmdNames, rawContents }. +function isContentStream(val) { + return ( + val !== null && + typeof val === "object" && + val.contentStream === true && + Array.isArray(val.instructions) && + Array.isArray(val.rawContents) + ); +} + +// Streams: { dict, bytes }, { dict, imageData }, +// or { dict, contentStream: true, instructions, cmdNames } (Form XObject). +function isStream(val) { + return ( + val !== null && + typeof val === "object" && + !Array.isArray(val) && + Object.prototype.hasOwnProperty.call(val, "dict") && + (Object.prototype.hasOwnProperty.call(val, "bytes") || + Object.prototype.hasOwnProperty.call(val, "imageData") || + val.contentStream === true) + ); +} + +function isImageStream(val) { + return ( + isStream(val) && Object.prototype.hasOwnProperty.call(val, "imageData") + ); +} + +function isFormXObjectStream(val) { + return isStream(val) && val.contentStream === true; +} + +/** + * Render one key/value pair as a
. + * @param {string|null} key Dict key, array index, or null for root. + * @param {*} value + * @param {PDFDocumentProxy} doc + */ +function renderNode(key, value, doc) { + const node = document.createElement("div"); + node.className = "node"; + node.setAttribute("role", "treeitem"); + node.tabIndex = -1; + + if (key !== null) { + const keyEl = document.createElement("span"); + keyEl.className = "key"; + keyEl.textContent = key; + node.append(keyEl); + const sep = document.createElement("span"); + sep.className = "separator"; + sep.textContent = ": "; + node.append(sep); + } + + node.append(renderValue(value, doc)); + return node; +} + +/** + * Populate a container element with the direct children of a value. + * Used both by renderValue (inside expandables) and renderRef (directly + * into the ref's children container, avoiding an extra toggle level). + */ +function buildChildren(value, doc, container) { + if (isStream(value)) { + for (const [k, v] of Object.entries(value.dict)) { + container.append(renderNode(k, v, doc)); + } + if (isImageStream(value)) { + container.append(renderImageData(value.imageData)); + } else if (isFormXObjectStream(value)) { + const contentNode = document.createElement("div"); + contentNode.className = "node"; + contentNode.setAttribute("role", "treeitem"); + contentNode.tabIndex = -1; + contentNode.append(makeSpan("key", "content")); + contentNode.append(makeSpan("separator", ": ")); + + const parsedEl = document.createElement("span"); + parsedEl.className = "content-stream-parsed"; + parsedEl.append( + renderExpandable( + `[Content Stream, ${value.instructions.length} instructions]`, + "stream-label", + c => buildInstructionLines(value, c) + ) + ); + + const rawEl = document.createElement("span"); + rawEl.className = "content-stream-raw"; + const byteLabel = makeSpan( + "stream-label", + `<${value.bytes.length} raw bytes>` + ); + rawEl.append(byteLabel); + const bytesContentEl = document.createElement("div"); + bytesContentEl.className = "bytes-content"; + bytesContentEl.append(formatBytes(value.bytes)); + rawEl.append(bytesContentEl); + + contentNode.append(parsedEl, rawEl); + container.append(contentNode); + } else { + const byteNode = document.createElement("div"); + byteNode.className = "node"; + const keyEl = document.createElement("span"); + keyEl.className = "key"; + keyEl.textContent = "bytes"; + const sep = document.createElement("span"); + sep.className = "separator"; + sep.textContent = ": "; + const valEl = document.createElement("span"); + valEl.className = "stream-label"; + valEl.textContent = `<${value.bytes.length} raw bytes>`; + byteNode.append(keyEl, sep, valEl); + container.append(byteNode); + + const bytesContentEl = document.createElement("div"); + bytesContentEl.className = "bytes-content"; + bytesContentEl.append(formatBytes(value.bytes)); + container.append(bytesContentEl); + } + } else if (Array.isArray(value)) { + value.forEach((v, i) => container.append(renderNode(String(i), v, doc))); + } else if (value !== null && typeof value === "object") { + for (const [k, v] of Object.entries(value)) { + container.append(renderNode(k, v, doc)); + } + } else { + container.append(renderNode(null, value, doc)); + } +} + +/** + * Render a single content-stream token as a styled span. + */ +function renderToken(token) { + if (!token) { + return makeSpan("token-null", "null"); + } + switch (token.type) { + case "cmd": + return makeSpan("token-cmd", token.value); + case "name": + return makeSpan("token-name", "/" + token.value); + case "ref": + return makeSpan("token-ref", `${token.num} ${token.gen} R`); + case "number": + return makeSpan("token-num", String(token.value)); + case "string": + return makeSpan("token-str", JSON.stringify(token.value)); + case "boolean": + return makeSpan("token-bool", String(token.value)); + case "null": + return makeSpan("token-null", "null"); + case "array": { + const span = document.createElement("span"); + span.className = "token-array"; + span.append(makeSpan("bracket", "[")); + for (const item of token.value) { + span.append(document.createTextNode(" ")); + span.append(renderToken(item)); + } + span.append(document.createTextNode(" ")); + span.append(makeSpan("bracket", "]")); + return span; + } + case "dict": { + const span = document.createElement("span"); + span.className = "token-dict"; + span.append(makeSpan("bracket", "<<")); + for (const [k, v] of Object.entries(token.value)) { + span.append(document.createTextNode(" ")); + span.append(makeSpan("token-name", "/" + k)); + span.append(document.createTextNode(" ")); + span.append(renderToken(v)); + } + span.append(document.createTextNode(" ")); + span.append(makeSpan("bracket", ">>")); + return span; + } + default: + return makeSpan("token-unknown", String(token.value ?? token.type)); + } +} + +/** + * Populate container with one .cs-instruction div per instruction. + * Shared by Page content streams and Form XObject streams. + */ +function buildInstructionLines(val, container) { + const pre = document.createElement("div"); + pre.className = "content-stream"; + let depth = 0; + for (const instr of val.instructions) { + if (instr.cmd === "ET" || instr.cmd === "Q" || instr.cmd === "EMC") { + depth = Math.max(0, depth - 1); + } + const line = document.createElement("div"); + line.className = "cs-instruction"; + if (depth > 0) { + line.style.paddingInlineStart = `${depth * 1.5}em`; + } + for (const arg of instr.args) { + line.append(renderToken(arg)); + line.append(document.createTextNode(" ")); + } + if (instr.cmd !== null) { + const cmdEl = makeSpan("token-cmd", instr.cmd); + const opsName = val.cmdNames[instr.cmd]; + if (opsName) { + cmdEl.title = opsName; + } + line.append(cmdEl); + } + pre.append(line); + if (instr.cmd === "BT" || instr.cmd === "q" || instr.cmd === "BDC") { + depth++; + } + } + container.append(pre); +} + +/** + * Render Page content stream as two pre-built views toggled by CSS: + * - .content-stream-parsed: expandable colorized instruction widget + * - .content-stream-raw: ref widget(s) mirroring the unparsed display + * The active view is controlled by the "parse-cs-active" class on #tree. + */ +function renderContentStream(val, doc) { + const frag = document.createDocumentFragment(); + + const parsedEl = document.createElement("span"); + parsedEl.className = "content-stream-parsed"; + parsedEl.append( + renderExpandable( + `[Content Stream, ${val.instructions.length} instructions]`, + "stream-label", + container => buildInstructionLines(val, container) + ) + ); + + const rawEl = document.createElement("span"); + rawEl.className = "content-stream-raw"; + const rawVal = + val.rawContents.length === 1 ? val.rawContents[0] : val.rawContents; + rawEl.append(renderValue(rawVal, doc)); + + frag.append(parsedEl, rawEl); + return frag; +} + +/** + * Render a value inline (primitive) or as an expandable widget. + * Returns a Node or DocumentFragment suitable for appendChild(). + */ +function renderValue(value, doc) { + // Ref string ("10 0 R") – lazy expandable via getRawData() + if (typeof value === "string" && REF_RE.test(value)) { + return renderRef(value, doc); + } + + // Ref object { num, gen } – lazy expandable via getRawData() + if (isRefObject(value)) { + return renderRef(value, doc); + } + + // PDF Name → /Name + if (isPDFName(value)) { + return makeSpan("name-value", "/" + value.name); + } + + // Content stream (Page Contents) → two pre-built views toggled by CSS + if (isContentStream(value)) { + return renderContentStream(value, doc); + } + + // Stream → expandable showing dict entries + byte count or image preview + if (isStream(value)) { + return renderExpandable("[Stream]", "stream-label", container => + buildChildren(value, doc, container) + ); + } + + // Plain object (dict) + if (value !== null && typeof value === "object" && !Array.isArray(value)) { + const keys = Object.keys(value); + if (keys.length === 0) { + return makeSpan("bracket", "{}"); + } + return renderExpandable(`{${keys.length}}`, "bracket", container => + buildChildren(value, doc, container) + ); + } + + // Array + if (Array.isArray(value)) { + if (value.length === 0) { + return makeSpan("bracket", "[]"); + } + return renderExpandable(`[${value.length}]`, "bracket", container => + buildChildren(value, doc, container) + ); + } + + // Primitives + if (typeof value === "string") { + return makeSpan("str-value", JSON.stringify(value)); + } + if (typeof value === "number") { + return makeSpan("num-value", String(value)); + } + if (typeof value === "boolean") { + return makeSpan("bool-value", String(value)); + } + return makeSpan("null-value", "null"); +} + +/** + * Build a lazy-loading expand/collapse widget for a ref (string or object). + * Results are cached in `refCache` keyed by "num:gen". + */ +function renderRef(ref, doc) { + // Derive the cache key and display label from whichever form we received. + // String refs look like "10 0 R"; object refs are { num, gen }. + let cacheKey, label; + if (typeof ref === "string") { + const parts = ref.split(" "); + cacheKey = `${parts[0]}:${parts[1]}`; + label = ref; + } else { + cacheKey = `${ref.num}:${ref.gen}`; + label = refLabel(ref); + } + + const frag = document.createDocumentFragment(); + + const toggleEl = document.createElement("span"); + toggleEl.textContent = ARROW_COLLAPSED; + toggleEl.setAttribute("role", "button"); + toggleEl.setAttribute("tabindex", "0"); + toggleEl.setAttribute("aria-expanded", "false"); + toggleEl.setAttribute("aria-label", `Expand reference ${label}`); + + const refEl = document.createElement("span"); + refEl.className = "ref"; + refEl.textContent = label; + refEl.setAttribute("aria-hidden", "true"); + + const childrenEl = document.createElement("div"); + childrenEl.className = "hidden"; + childrenEl.setAttribute("role", "group"); + childrenEl.setAttribute("aria-label", `Contents of reference ${label}`); + + let open = false; + let loaded = false; + + const onToggle = async () => { + open = !open; + toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED; + toggleEl.setAttribute("aria-expanded", String(open)); + childrenEl.classList.toggle("hidden", !open); + + if (open && !loaded) { + loaded = true; + const spinner = document.createElement("div"); + spinner.setAttribute("role", "status"); + spinner.textContent = "Loading…"; + childrenEl.append(spinner); + + try { + if (!refCache.has(cacheKey)) { + refCache.set(cacheKey, doc.getRawData({ ref })); + } + const result = await refCache.get(cacheKey); + childrenEl.replaceChildren(); + buildChildren(result, doc, childrenEl); + } catch (err) { + const errEl = document.createElement("div"); + errEl.setAttribute("role", "alert"); + errEl.textContent = "Error: " + err.message; + childrenEl.replaceChildren(errEl); + } + } + }; + + toggleEl.addEventListener("click", onToggle); + toggleEl.addEventListener("keydown", e => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggle(); + } + }); + refEl.addEventListener("click", onToggle); + + frag.append(toggleEl); + frag.append(refEl); + frag.append(childrenEl); + return frag; +} + +/** + * Build a synchronous expand/collapse widget. + * @param {string} label Text shown on the collapsed line. + * @param {string} labelClass CSS class for the label. + * @param {function} buildFn Called with (containerEl) on first open. + */ +function renderExpandable(label, labelClass, buildFn) { + const frag = document.createDocumentFragment(); + + const toggleEl = document.createElement("span"); + toggleEl.textContent = ARROW_COLLAPSED; + toggleEl.setAttribute("role", "button"); + toggleEl.setAttribute("tabindex", "0"); + toggleEl.setAttribute("aria-expanded", "false"); + toggleEl.setAttribute("aria-label", `Expand ${label}`); + + const labelEl = document.createElement("span"); + labelEl.className = labelClass; + labelEl.textContent = label; + labelEl.setAttribute("aria-hidden", "true"); + + const childrenEl = document.createElement("div"); + childrenEl.className = "hidden"; + childrenEl.setAttribute("role", "group"); + childrenEl.setAttribute("aria-label", `Contents of ${label}`); + + let open = false; + let built = false; + + const onToggle = () => { + open = !open; + toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED; + toggleEl.setAttribute("aria-expanded", String(open)); + childrenEl.classList.toggle("hidden", !open); + if (open && !built) { + built = true; + buildFn(childrenEl); + } + }; + + toggleEl.addEventListener("click", onToggle); + toggleEl.addEventListener("keydown", e => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + onToggle(); + } + }); + labelEl.addEventListener("click", onToggle); + + frag.append(toggleEl); + frag.append(labelEl); + frag.append(childrenEl); + return frag; +} + +/** + * Build a DocumentFragment for the byte string. + * Printable ASCII (0x20–0x7e) runs become plain text nodes. + * Consecutive non-printable bytes are grouped into a single + * with each byte as uppercase XX separated by + * a narrow space. + */ +/** + * Render image data (RGBA Uint8ClampedArray) into a node. + */ +function renderImageData({ width, height, data }) { + const node = document.createElement("div"); + node.className = "node"; + const keyEl = document.createElement("span"); + keyEl.className = "key"; + keyEl.textContent = "imageData"; + const sep = document.createElement("span"); + sep.className = "separator"; + sep.textContent = ": "; + const info = document.createElement("span"); + info.className = "stream-label"; + info.textContent = `<${width}×${height}>`; + node.append(keyEl, sep, info); + + const canvas = document.createElement("canvas"); + canvas.className = "image-preview"; + canvas.width = width; + canvas.height = height; + canvas.setAttribute("aria-label", `Image preview ${width}×${height}`); + const ctx = canvas.getContext("2d"); + const imgData = new ImageData(new Uint8ClampedArray(data), width, height); + ctx.putImageData(imgData, 0, 0); + node.append(canvas); + return node; +} + +function isMostlyText(str) { + let printable = 0; + for (let i = 0; i < str.length; i++) { + const c = str.charCodeAt(i); + if (c >= 0x20 && c <= 0x7e) { + printable++; + } + } + return str.length > 0 && printable / str.length >= 0.8; +} + +function formatBytes(str) { + const mostlyText = isMostlyText(str); + const frag = document.createDocumentFragment(); + + if (!mostlyText) { + // Binary content: render every byte as hex in a single span. + const span = document.createElement("span"); + span.className = "bytes-hex"; + const hexParts = []; + for (let i = 0; i < str.length; i++) { + hexParts.push( + str.charCodeAt(i).toString(16).toUpperCase().padStart(2, "0") + ); + } + span.textContent = hexParts.join("\u00B7\u200B"); + frag.append(span); + return frag; + } + + // Text content: printable ASCII + 0x0A as-is, other bytes as hex spans. + const isPrintable = c => (c >= 0x20 && c <= 0x7e) || c === 0x0a; + let i = 0; + while (i < str.length) { + const code = str.charCodeAt(i); + if (isPrintable(code)) { + let run = ""; + while (i < str.length && isPrintable(str.charCodeAt(i))) { + run += str[i++]; + } + frag.append(document.createTextNode(run)); + } else { + const span = document.createElement("span"); + span.className = "bytes-hex"; + const hexParts = []; + while (i < str.length && !isPrintable(str.charCodeAt(i))) { + hexParts.push( + str.charCodeAt(i).toString(16).toUpperCase().padStart(2, "0") + ); + i++; + } + span.textContent = hexParts.join("\u00B7\u200B"); + frag.append(span); + } + } + return frag; +} + +/** Create a with the given class and text content. */ +function makeSpan(className, text) { + const span = document.createElement("span"); + span.className = className; + span.textContent = text; + return span; +}