+
+
+
+
+ Enter a page number (e.g. 5), a reference as numR (e.g. 10R) or numRgen (e.g. 10R2). Press Enter to navigate.
+
+ Select a PDF file to explore its internal structure.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/web/pdf_internal_viewer.js b/web/pdf_internal_viewer.js
new file mode 100644
index 0000000000000..6504b3cd597c9
--- /dev/null
+++ b/web/pdf_internal_viewer.js
@@ -0,0 +1,813 @@
+/* Copyright 2026 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { getDocument, GlobalWorkerOptions, PasswordResponses } from "pdfjs-lib";
+
+GlobalWorkerOptions.workerSrc =
+ typeof PDFJSDev === "undefined"
+ ? "../src/pdf.worker.js"
+ : "../build/pdf.worker.mjs";
+
+const ARROW_COLLAPSED = "▶";
+const ARROW_EXPANDED = "▼";
+
+// Matches indirect object references such as "10 0 R".
+const REF_RE = /^\d+ \d+ R$/;
+
+// Parses "num" into { page: num }, or "numR"/"numRgen" into { ref: {num,gen} }.
+// Returns null for invalid input.
+function parseGoToInput(str) {
+ const m = str.trim().match(/^(\d+)(R(\d+)?)?$/i);
+ if (!m) {
+ return null;
+ }
+ if (!m[2]) {
+ return { page: parseInt(m[1]) };
+ }
+ return {
+ ref: { num: parseInt(m[1]), gen: m[3] !== undefined ? parseInt(m[3]) : 0 },
+ };
+}
+
+// Parses "num", "numR" or "numRgen" into { num, gen }, or returns null.
+// Used for URL hash param parsing where a bare number means a ref, not a page.
+function parseRefInput(str) {
+ const m = str.trim().match(/^(\d+)(?:R(\d+)?)?$/i);
+ if (!m) {
+ return null;
+ }
+ return { num: parseInt(m[1]), gen: m[2] !== undefined ? parseInt(m[2]) : 0 };
+}
+
+let pdfDoc = null;
+
+// Cache for getRawData results, keyed by "num:gen". Cleared on each new
+// document.
+const refCache = new Map();
+
+function updateParseCSClass() {
+ document
+ .getElementById("tree")
+ .classList.toggle(
+ "parse-cs-active",
+ document.getElementById("parse-content-stream").checked
+ );
+}
+
+async function loadTree(data, rootLabel = null) {
+ const treeEl = document.getElementById("tree");
+ const rootNode = renderNode(rootLabel, await pdfDoc.getRawData(data), pdfDoc);
+ treeEl.replaceChildren(rootNode);
+ rootNode.querySelector("[role='button']").click();
+}
+
+async function openDocument(source, name) {
+ const statusEl = document.getElementById("status");
+ const pdfInfoEl = document.getElementById("pdf-info");
+ const gotoInput = document.getElementById("goto-input");
+
+ statusEl.textContent = `Loading ${name}…`;
+ pdfInfoEl.textContent = "";
+ refCache.clear();
+
+ if (pdfDoc) {
+ await pdfDoc.destroy();
+ pdfDoc = null;
+ }
+
+ const loadingTask = getDocument({ ...source, wasmUrl: "wasm/" });
+ loadingTask.onPassword = (updateCallback, reason) => {
+ const dialog = document.getElementById("password-dialog");
+ const title = document.getElementById("password-dialog-title");
+ const input = document.getElementById("password-input");
+ const cancelBtn = document.getElementById("password-cancel");
+
+ title.textContent =
+ reason === PasswordResponses.INCORRECT_PASSWORD
+ ? "Incorrect password. Please try again:"
+ : "This PDF is password-protected. Please enter the password:";
+ input.value = "";
+ dialog.showModal();
+
+ const onSubmit = () => {
+ cleanup();
+ updateCallback(input.value);
+ };
+ const onCancel = () => {
+ cleanup();
+ dialog.close();
+ updateCallback(new Error("Password prompt cancelled."));
+ };
+ const cleanup = () => {
+ dialog.removeEventListener("close", onSubmit);
+ cancelBtn.removeEventListener("click", onCancel);
+ };
+
+ dialog.addEventListener("close", onSubmit, { once: true });
+ cancelBtn.addEventListener("click", onCancel, { once: true });
+ };
+ pdfDoc = await loadingTask.promise;
+ const plural = pdfDoc.numPages !== 1 ? "s" : "";
+ pdfInfoEl.textContent = `${name} — ${pdfDoc.numPages} page${plural}`;
+ statusEl.textContent = "";
+ gotoInput.disabled = false;
+ gotoInput.value = "";
+}
+
+function showError(err) {
+ document.getElementById("status").textContent = "Error: " + err.message;
+ const msg = document.createElement("div");
+ msg.setAttribute("role", "alert");
+ msg.textContent = err.message;
+ document.getElementById("tree").append(msg);
+}
+
+document.getElementById("file-input").value = "";
+
+document
+ .getElementById("parse-content-stream")
+ .addEventListener("change", updateParseCSClass);
+
+updateParseCSClass();
+
+document.getElementById("file-input").addEventListener("change", async e => {
+ const file = e.target.files[0];
+ if (!file) {
+ return;
+ }
+ try {
+ await openDocument({ data: await file.arrayBuffer() }, file.name);
+ await loadTree({ ref: null }, "Trailer");
+ } catch (err) {
+ showError(err);
+ }
+});
+
+(async () => {
+ const searchParams = new URLSearchParams(location.search);
+ const hashParams = new URLSearchParams(location.hash.slice(1));
+ const fileUrl = searchParams.get("file");
+ if (!fileUrl) {
+ return;
+ }
+ try {
+ await openDocument({ url: fileUrl }, fileUrl.split("/").pop());
+ const refStr = hashParams.get("ref");
+ const pageStr = hashParams.get("page");
+ if (refStr) {
+ const ref = parseRefInput(refStr);
+ if (ref) {
+ document.getElementById("goto-input").value = refStr;
+ await loadTree({ ref });
+ return;
+ }
+ }
+ if (pageStr) {
+ const page = parseInt(pageStr);
+ if (Number.isInteger(page) && page >= 1 && page <= pdfDoc.numPages) {
+ document.getElementById("goto-input").value = pageStr;
+ await loadTree({ page });
+ return;
+ }
+ }
+ await loadTree({ ref: null }, "Trailer");
+ } catch (err) {
+ showError(err);
+ }
+})();
+
+document.getElementById("goto-input").addEventListener("keydown", async e => {
+ if (e.key !== "Enter" || !pdfDoc) {
+ return;
+ }
+ const input = e.target;
+ if (input.value.trim() === "") {
+ input.setAttribute("aria-invalid", "false");
+ await loadTree({ ref: null }, "Trailer");
+ return;
+ }
+ const result = parseGoToInput(input.value);
+ if (!result) {
+ input.setAttribute("aria-invalid", "true");
+ return;
+ }
+ if (
+ result.page !== undefined &&
+ (result.page < 1 || result.page > pdfDoc.numPages)
+ ) {
+ input.setAttribute("aria-invalid", "true");
+ return;
+ }
+ input.setAttribute("aria-invalid", "false");
+ await (result.page !== undefined
+ ? loadTree({ page: result.page })
+ : loadTree({ ref: result.ref }));
+});
+
+document.getElementById("goto-input").addEventListener("input", e => {
+ if (e.target.value.trim() === "") {
+ e.target.setAttribute("aria-invalid", "false");
+ }
+});
+
+// PDF Name objects arrive as { name: "..." } after structured clone.
+function isPDFName(val) {
+ return (
+ val !== null &&
+ typeof val === "object" &&
+ !Array.isArray(val) &&
+ typeof val.name === "string" &&
+ Object.keys(val).length === 1
+ );
+}
+
+// Ref objects arrive as { num: N, gen: G } after structured clone.
+function isRefObject(val) {
+ return (
+ val !== null &&
+ typeof val === "object" &&
+ !Array.isArray(val) &&
+ typeof val.num === "number" &&
+ typeof val.gen === "number" &&
+ Object.keys(val).length === 2
+ );
+}
+
+function refLabel(ref) {
+ return ref.gen !== 0 ? `${ref.num}R${ref.gen}` : `${ref.num}R`;
+}
+
+// Page content streams:
+// { contentStream: true, instructions, cmdNames, rawContents }.
+function isContentStream(val) {
+ return (
+ val !== null &&
+ typeof val === "object" &&
+ val.contentStream === true &&
+ Array.isArray(val.instructions) &&
+ Array.isArray(val.rawContents)
+ );
+}
+
+// Streams: { dict, bytes }, { dict, imageData },
+// or { dict, contentStream: true, instructions, cmdNames } (Form XObject).
+function isStream(val) {
+ return (
+ val !== null &&
+ typeof val === "object" &&
+ !Array.isArray(val) &&
+ Object.prototype.hasOwnProperty.call(val, "dict") &&
+ (Object.prototype.hasOwnProperty.call(val, "bytes") ||
+ Object.prototype.hasOwnProperty.call(val, "imageData") ||
+ val.contentStream === true)
+ );
+}
+
+function isImageStream(val) {
+ return (
+ isStream(val) && Object.prototype.hasOwnProperty.call(val, "imageData")
+ );
+}
+
+function isFormXObjectStream(val) {
+ return isStream(val) && val.contentStream === true;
+}
+
+/**
+ * Render one key/value pair as a
.
+ * @param {string|null} key Dict key, array index, or null for root.
+ * @param {*} value
+ * @param {PDFDocumentProxy} doc
+ */
+function renderNode(key, value, doc) {
+ const node = document.createElement("div");
+ node.className = "node";
+ node.setAttribute("role", "treeitem");
+ node.tabIndex = -1;
+
+ if (key !== null) {
+ const keyEl = document.createElement("span");
+ keyEl.className = "key";
+ keyEl.textContent = key;
+ node.append(keyEl);
+ const sep = document.createElement("span");
+ sep.className = "separator";
+ sep.textContent = ": ";
+ node.append(sep);
+ }
+
+ node.append(renderValue(value, doc));
+ return node;
+}
+
+/**
+ * Populate a container element with the direct children of a value.
+ * Used both by renderValue (inside expandables) and renderRef (directly
+ * into the ref's children container, avoiding an extra toggle level).
+ */
+function buildChildren(value, doc, container) {
+ if (isStream(value)) {
+ for (const [k, v] of Object.entries(value.dict)) {
+ container.append(renderNode(k, v, doc));
+ }
+ if (isImageStream(value)) {
+ container.append(renderImageData(value.imageData));
+ } else if (isFormXObjectStream(value)) {
+ const contentNode = document.createElement("div");
+ contentNode.className = "node";
+ contentNode.setAttribute("role", "treeitem");
+ contentNode.tabIndex = -1;
+ contentNode.append(makeSpan("key", "content"));
+ contentNode.append(makeSpan("separator", ": "));
+
+ const parsedEl = document.createElement("span");
+ parsedEl.className = "content-stream-parsed";
+ parsedEl.append(
+ renderExpandable(
+ `[Content Stream, ${value.instructions.length} instructions]`,
+ "stream-label",
+ c => buildInstructionLines(value, c)
+ )
+ );
+
+ const rawEl = document.createElement("span");
+ rawEl.className = "content-stream-raw";
+ const byteLabel = makeSpan(
+ "stream-label",
+ `<${value.bytes.length} raw bytes>`
+ );
+ rawEl.append(byteLabel);
+ const bytesContentEl = document.createElement("div");
+ bytesContentEl.className = "bytes-content";
+ bytesContentEl.append(formatBytes(value.bytes));
+ rawEl.append(bytesContentEl);
+
+ contentNode.append(parsedEl, rawEl);
+ container.append(contentNode);
+ } else {
+ const byteNode = document.createElement("div");
+ byteNode.className = "node";
+ const keyEl = document.createElement("span");
+ keyEl.className = "key";
+ keyEl.textContent = "bytes";
+ const sep = document.createElement("span");
+ sep.className = "separator";
+ sep.textContent = ": ";
+ const valEl = document.createElement("span");
+ valEl.className = "stream-label";
+ valEl.textContent = `<${value.bytes.length} raw bytes>`;
+ byteNode.append(keyEl, sep, valEl);
+ container.append(byteNode);
+
+ const bytesContentEl = document.createElement("div");
+ bytesContentEl.className = "bytes-content";
+ bytesContentEl.append(formatBytes(value.bytes));
+ container.append(bytesContentEl);
+ }
+ } else if (Array.isArray(value)) {
+ value.forEach((v, i) => container.append(renderNode(String(i), v, doc)));
+ } else if (value !== null && typeof value === "object") {
+ for (const [k, v] of Object.entries(value)) {
+ container.append(renderNode(k, v, doc));
+ }
+ } else {
+ container.append(renderNode(null, value, doc));
+ }
+}
+
+/**
+ * Render a single content-stream token as a styled span.
+ */
+function renderToken(token) {
+ if (!token) {
+ return makeSpan("token-null", "null");
+ }
+ switch (token.type) {
+ case "cmd":
+ return makeSpan("token-cmd", token.value);
+ case "name":
+ return makeSpan("token-name", "/" + token.value);
+ case "ref":
+ return makeSpan("token-ref", `${token.num} ${token.gen} R`);
+ case "number":
+ return makeSpan("token-num", String(token.value));
+ case "string":
+ return makeSpan("token-str", JSON.stringify(token.value));
+ case "boolean":
+ return makeSpan("token-bool", String(token.value));
+ case "null":
+ return makeSpan("token-null", "null");
+ case "array": {
+ const span = document.createElement("span");
+ span.className = "token-array";
+ span.append(makeSpan("bracket", "["));
+ for (const item of token.value) {
+ span.append(document.createTextNode(" "));
+ span.append(renderToken(item));
+ }
+ span.append(document.createTextNode(" "));
+ span.append(makeSpan("bracket", "]"));
+ return span;
+ }
+ case "dict": {
+ const span = document.createElement("span");
+ span.className = "token-dict";
+ span.append(makeSpan("bracket", "<<"));
+ for (const [k, v] of Object.entries(token.value)) {
+ span.append(document.createTextNode(" "));
+ span.append(makeSpan("token-name", "/" + k));
+ span.append(document.createTextNode(" "));
+ span.append(renderToken(v));
+ }
+ span.append(document.createTextNode(" "));
+ span.append(makeSpan("bracket", ">>"));
+ return span;
+ }
+ default:
+ return makeSpan("token-unknown", String(token.value ?? token.type));
+ }
+}
+
+/**
+ * Populate container with one .cs-instruction div per instruction.
+ * Shared by Page content streams and Form XObject streams.
+ */
+function buildInstructionLines(val, container) {
+ const pre = document.createElement("div");
+ pre.className = "content-stream";
+ let depth = 0;
+ for (const instr of val.instructions) {
+ if (instr.cmd === "ET" || instr.cmd === "Q" || instr.cmd === "EMC") {
+ depth = Math.max(0, depth - 1);
+ }
+ const line = document.createElement("div");
+ line.className = "cs-instruction";
+ if (depth > 0) {
+ line.style.paddingInlineStart = `${depth * 1.5}em`;
+ }
+ for (const arg of instr.args) {
+ line.append(renderToken(arg));
+ line.append(document.createTextNode(" "));
+ }
+ if (instr.cmd !== null) {
+ const cmdEl = makeSpan("token-cmd", instr.cmd);
+ const opsName = val.cmdNames[instr.cmd];
+ if (opsName) {
+ cmdEl.title = opsName;
+ }
+ line.append(cmdEl);
+ }
+ pre.append(line);
+ if (instr.cmd === "BT" || instr.cmd === "q" || instr.cmd === "BDC") {
+ depth++;
+ }
+ }
+ container.append(pre);
+}
+
+/**
+ * Render Page content stream as two pre-built views toggled by CSS:
+ * - .content-stream-parsed: expandable colorized instruction widget
+ * - .content-stream-raw: ref widget(s) mirroring the unparsed display
+ * The active view is controlled by the "parse-cs-active" class on #tree.
+ */
+function renderContentStream(val, doc) {
+ const frag = document.createDocumentFragment();
+
+ const parsedEl = document.createElement("span");
+ parsedEl.className = "content-stream-parsed";
+ parsedEl.append(
+ renderExpandable(
+ `[Content Stream, ${val.instructions.length} instructions]`,
+ "stream-label",
+ container => buildInstructionLines(val, container)
+ )
+ );
+
+ const rawEl = document.createElement("span");
+ rawEl.className = "content-stream-raw";
+ const rawVal =
+ val.rawContents.length === 1 ? val.rawContents[0] : val.rawContents;
+ rawEl.append(renderValue(rawVal, doc));
+
+ frag.append(parsedEl, rawEl);
+ return frag;
+}
+
+/**
+ * Render a value inline (primitive) or as an expandable widget.
+ * Returns a Node or DocumentFragment suitable for appendChild().
+ */
+function renderValue(value, doc) {
+ // Ref string ("10 0 R") – lazy expandable via getRawData()
+ if (typeof value === "string" && REF_RE.test(value)) {
+ return renderRef(value, doc);
+ }
+
+ // Ref object { num, gen } – lazy expandable via getRawData()
+ if (isRefObject(value)) {
+ return renderRef(value, doc);
+ }
+
+ // PDF Name → /Name
+ if (isPDFName(value)) {
+ return makeSpan("name-value", "/" + value.name);
+ }
+
+ // Content stream (Page Contents) → two pre-built views toggled by CSS
+ if (isContentStream(value)) {
+ return renderContentStream(value, doc);
+ }
+
+ // Stream → expandable showing dict entries + byte count or image preview
+ if (isStream(value)) {
+ return renderExpandable("[Stream]", "stream-label", container =>
+ buildChildren(value, doc, container)
+ );
+ }
+
+ // Plain object (dict)
+ if (value !== null && typeof value === "object" && !Array.isArray(value)) {
+ const keys = Object.keys(value);
+ if (keys.length === 0) {
+ return makeSpan("bracket", "{}");
+ }
+ return renderExpandable(`{${keys.length}}`, "bracket", container =>
+ buildChildren(value, doc, container)
+ );
+ }
+
+ // Array
+ if (Array.isArray(value)) {
+ if (value.length === 0) {
+ return makeSpan("bracket", "[]");
+ }
+ return renderExpandable(`[${value.length}]`, "bracket", container =>
+ buildChildren(value, doc, container)
+ );
+ }
+
+ // Primitives
+ if (typeof value === "string") {
+ return makeSpan("str-value", JSON.stringify(value));
+ }
+ if (typeof value === "number") {
+ return makeSpan("num-value", String(value));
+ }
+ if (typeof value === "boolean") {
+ return makeSpan("bool-value", String(value));
+ }
+ return makeSpan("null-value", "null");
+}
+
+/**
+ * Build a lazy-loading expand/collapse widget for a ref (string or object).
+ * Results are cached in `refCache` keyed by "num:gen".
+ */
+function renderRef(ref, doc) {
+ // Derive the cache key and display label from whichever form we received.
+ // String refs look like "10 0 R"; object refs are { num, gen }.
+ let cacheKey, label;
+ if (typeof ref === "string") {
+ const parts = ref.split(" ");
+ cacheKey = `${parts[0]}:${parts[1]}`;
+ label = ref;
+ } else {
+ cacheKey = `${ref.num}:${ref.gen}`;
+ label = refLabel(ref);
+ }
+
+ const frag = document.createDocumentFragment();
+
+ const toggleEl = document.createElement("span");
+ toggleEl.textContent = ARROW_COLLAPSED;
+ toggleEl.setAttribute("role", "button");
+ toggleEl.setAttribute("tabindex", "0");
+ toggleEl.setAttribute("aria-expanded", "false");
+ toggleEl.setAttribute("aria-label", `Expand reference ${label}`);
+
+ const refEl = document.createElement("span");
+ refEl.className = "ref";
+ refEl.textContent = label;
+ refEl.setAttribute("aria-hidden", "true");
+
+ const childrenEl = document.createElement("div");
+ childrenEl.className = "hidden";
+ childrenEl.setAttribute("role", "group");
+ childrenEl.setAttribute("aria-label", `Contents of reference ${label}`);
+
+ let open = false;
+ let loaded = false;
+
+ const onToggle = async () => {
+ open = !open;
+ toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED;
+ toggleEl.setAttribute("aria-expanded", String(open));
+ childrenEl.classList.toggle("hidden", !open);
+
+ if (open && !loaded) {
+ loaded = true;
+ const spinner = document.createElement("div");
+ spinner.setAttribute("role", "status");
+ spinner.textContent = "Loading…";
+ childrenEl.append(spinner);
+
+ try {
+ if (!refCache.has(cacheKey)) {
+ refCache.set(cacheKey, doc.getRawData({ ref }));
+ }
+ const result = await refCache.get(cacheKey);
+ childrenEl.replaceChildren();
+ buildChildren(result, doc, childrenEl);
+ } catch (err) {
+ const errEl = document.createElement("div");
+ errEl.setAttribute("role", "alert");
+ errEl.textContent = "Error: " + err.message;
+ childrenEl.replaceChildren(errEl);
+ }
+ }
+ };
+
+ toggleEl.addEventListener("click", onToggle);
+ toggleEl.addEventListener("keydown", e => {
+ if (e.key === "Enter" || e.key === " ") {
+ e.preventDefault();
+ onToggle();
+ }
+ });
+ refEl.addEventListener("click", onToggle);
+
+ frag.append(toggleEl);
+ frag.append(refEl);
+ frag.append(childrenEl);
+ return frag;
+}
+
+/**
+ * Build a synchronous expand/collapse widget.
+ * @param {string} label Text shown on the collapsed line.
+ * @param {string} labelClass CSS class for the label.
+ * @param {function} buildFn Called with (containerEl) on first open.
+ */
+function renderExpandable(label, labelClass, buildFn) {
+ const frag = document.createDocumentFragment();
+
+ const toggleEl = document.createElement("span");
+ toggleEl.textContent = ARROW_COLLAPSED;
+ toggleEl.setAttribute("role", "button");
+ toggleEl.setAttribute("tabindex", "0");
+ toggleEl.setAttribute("aria-expanded", "false");
+ toggleEl.setAttribute("aria-label", `Expand ${label}`);
+
+ const labelEl = document.createElement("span");
+ labelEl.className = labelClass;
+ labelEl.textContent = label;
+ labelEl.setAttribute("aria-hidden", "true");
+
+ const childrenEl = document.createElement("div");
+ childrenEl.className = "hidden";
+ childrenEl.setAttribute("role", "group");
+ childrenEl.setAttribute("aria-label", `Contents of ${label}`);
+
+ let open = false;
+ let built = false;
+
+ const onToggle = () => {
+ open = !open;
+ toggleEl.textContent = open ? ARROW_EXPANDED : ARROW_COLLAPSED;
+ toggleEl.setAttribute("aria-expanded", String(open));
+ childrenEl.classList.toggle("hidden", !open);
+ if (open && !built) {
+ built = true;
+ buildFn(childrenEl);
+ }
+ };
+
+ toggleEl.addEventListener("click", onToggle);
+ toggleEl.addEventListener("keydown", e => {
+ if (e.key === "Enter" || e.key === " ") {
+ e.preventDefault();
+ onToggle();
+ }
+ });
+ labelEl.addEventListener("click", onToggle);
+
+ frag.append(toggleEl);
+ frag.append(labelEl);
+ frag.append(childrenEl);
+ return frag;
+}
+
+/**
+ * Build a DocumentFragment for the byte string.
+ * Printable ASCII (0x20–0x7e) runs become plain text nodes.
+ * Consecutive non-printable bytes are grouped into a single
+ * with each byte as uppercase XX separated by
+ * a narrow space.
+ */
+/**
+ * Render image data (RGBA Uint8ClampedArray) into a