Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions gulpfile.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ const IMAGE_DECODERS_LEGACY_DIR = BUILD_DIR + "image_decoders-legacy/";
const DEFAULT_PREFERENCES_DIR = BUILD_DIR + "default_preferences/";
const MINIFIED_DIR = BUILD_DIR + "minified/";
const MINIFIED_LEGACY_DIR = BUILD_DIR + "minified-legacy/";
const INTERNAL_VIEWER_DIR = BUILD_DIR + "internal-viewer/";
const JSDOC_BUILD_DIR = BUILD_DIR + "jsdoc/";
const GH_PAGES_DIR = BUILD_DIR + "gh-pages/";
const DIST_DIR = BUILD_DIR + "dist/";
Expand Down Expand Up @@ -2368,6 +2369,52 @@ gulp.task("check_l10n", function (done) {
});
});

function createInternalViewerBundle(defines) {
const viewerFileConfig = createWebpackConfig(defines, {
filename: "pdf_internal_viewer.mjs",
library: {
type: "module",
},
});
return gulp
.src("./web/pdf_internal_viewer.js", { encoding: false })
.pipe(webpack2Stream(viewerFileConfig));
}

function buildInternalViewer(defines, dir) {
fs.rmSync(dir, { recursive: true, force: true });

return ordered([
createMainBundle(defines).pipe(gulp.dest(dir + "build")),
createWorkerBundle(defines).pipe(gulp.dest(dir + "build")),
createInternalViewerBundle(defines).pipe(gulp.dest(dir + "web")),
preprocessHTML("web/pdf_internal_viewer.html", defines).pipe(
gulp.dest(dir + "web")
),
preprocessCSS("web/pdf_internal_viewer.css", defines)
.pipe(
postcss([
postcssDirPseudoClass(),
discardCommentsCSS(),
postcssNesting(),
postcssLightDarkFunction({ preserve: true }),
autoprefixer(AUTOPREFIXER_CONFIG),
])
)
.pipe(gulp.dest(dir + "web")),
createWasmBundle().pipe(gulp.dest(dir + "web/wasm")),
]);
}

gulp.task(
"internal-viewer",
gulp.series(createBuildNumber, function createInternalViewer() {
console.log("\n### Creating internal viewer");
const defines = { ...DEFINES, GENERIC: true };
return buildInternalViewer(defines, INTERNAL_VIEWER_DIR);
})
);

function ghPagesPrepare() {
console.log("\n### Creating web site");

Expand All @@ -2391,6 +2438,13 @@ function ghPagesPrepare() {
gulp
.src(JSDOC_BUILD_DIR + "**/*", { base: JSDOC_BUILD_DIR, encoding: false })
.pipe(gulp.dest(GH_PAGES_DIR + "api/draft/")),
gulp
.src(INTERNAL_VIEWER_DIR + "**/*", {
base: INTERNAL_VIEWER_DIR,
encoding: false,
removeBOM: false,
})
.pipe(gulp.dest(GH_PAGES_DIR + "internal-viewer/")),
]);
}

Expand Down Expand Up @@ -2442,6 +2496,7 @@ gulp.task(
gulp.series(
"generic",
"generic-legacy",
"internal-viewer",
"jsdoc",
ghPagesPrepare,
"metalsmith"
Expand Down
241 changes: 230 additions & 11 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
isArrayEqual,
makeArr,
objectSize,
OPS,
PageActionEventType,
RenderingIntentFlag,
shadow,
Expand All @@ -37,6 +38,17 @@ import {
PopupAnnotation,
WidgetAnnotation,
} from "./annotation.js";
import {
Cmd,
Dict,
EOF,
isName,
isRefsEqual,
Name,
Ref,
RefSet,
RefSetCache,
} from "./primitives.js";
import {
collectActions,
getInheritableProperty,
Expand All @@ -51,27 +63,21 @@ import {
XRefEntryException,
XRefParseException,
} from "./core_utils.js";
import {
Dict,
isName,
isRefsEqual,
Name,
Ref,
RefSet,
RefSetCache,
} from "./primitives.js";
import { EvaluatorPreprocessor, PartialEvaluator } from "./evaluator.js";
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
import { Lexer, Linearization, Parser } from "./parser.js";
import { NullStream, Stream } from "./stream.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./calculate_md5.js";
import { Catalog } from "./catalog.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
import { DatasetReader } from "./dataset_reader.js";
import { Intersector } from "./intersector.js";
import { Linearization } from "./parser.js";
import { LocalColorSpaceCache } from "./image_utils.js";
import { ObjectLoader } from "./object_loader.js";
import { OperatorList } from "./operator_list.js";
import { PartialEvaluator } from "./evaluator.js";
import { PDFFunctionFactory } from "./function.js";
import { PDFImage } from "./image.js";
import { StreamsSequenceStream } from "./decode_stream.js";
import { StructTreePage } from "./struct_tree.js";
import { XFAFactory } from "./xfa/factory.js";
Expand Down Expand Up @@ -2030,6 +2036,219 @@ class PDFDocument {
AnnotationFactory.createGlobals(this.pdfManager)
);
}

async toJSObject(value, firstCall = true) {
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error("Not implemented: toJSObject");
}

if (value === null && firstCall) {
return this.toJSObject(this.xref.trailer, false);
}
if (value instanceof Dict) {
const obj = Object.create(null);
const isPage = isName(value.get("Type"), "Page");
for (const [key, val] of value.getRawEntries()) {
obj[key] =
isPage && key === "Contents"
? _getContentTokens(val, this.xref)
: await this.toJSObject(val, false);
}
return obj;
}
if (Array.isArray(value)) {
return Promise.all(value.map(v => this.toJSObject(v, false)));
}
if (value instanceof Ref) {
if (firstCall) {
return this.toJSObject(this.xref.fetch(value), false);
}
const result = Object.create(null);
result.num = value.num;
result.gen = value.gen;
return result;
}
if (value instanceof BaseStream) {
const { dict } = value;
const obj = Object.create(null);
obj.dict = await this.toJSObject(dict, false);

if (
isName(dict.get("Type"), "XObject") &&
isName(dict.get("Subtype"), "Image")
) {
try {
const pdfFunctionFactory = new PDFFunctionFactory({
xref: this.xref,
isEvalSupported: this.pdfManager.evaluatorOptions.isEvalSupported,
});
const imageObj = await PDFImage.buildImage({
xref: this.xref,
res: Dict.empty,
image: value,
pdfFunctionFactory,
globalColorSpaceCache: this.catalog.globalColorSpaceCache,
localColorSpaceCache: new LocalColorSpaceCache(),
});
const imgData = await imageObj.createImageData(
/* forceRGBA = */ true,
/* isOffscreenCanvasSupported = */ false
);
obj.imageData = {
width: imgData.width,
height: imgData.height,
kind: imgData.kind,
data: imgData.data,
};
return obj;
} catch {
// Fall through to regular byte stream if image decoding fails.
}
}

if (isName(dict.get("Subtype"), "Form")) {
obj.bytes = value.getString();
value.reset();
const { instructions, cmdNames } = _groupIntoInstructions(
_tokenizeStream(value, this.xref)
);
obj.contentStream = true;
obj.instructions = instructions;
obj.cmdNames = cmdNames;
return obj;
}

obj.bytes = value.getString();
return obj;
}
return value;
}
}

function _tokenizeStream(stream, xref) {
const tokens = [];
const parser = new Parser({
lexer: new Lexer(stream),
xref,
allowStreams: false,
});
while (true) {
let obj;
try {
obj = parser.getObj();
} catch {
break;
}
if (obj === EOF) {
break;
}
const token = _tokenToJSObject(obj);
if (token !== null) {
tokens.push(token);
}
}
return tokens;
}

function _getContentTokens(contentsVal, xref) {
const refs = Array.isArray(contentsVal) ? contentsVal : [contentsVal];
const rawContents = [];
const tokens = [];
for (const rawRef of refs) {
if (rawRef instanceof Ref) {
rawContents.push({ num: rawRef.num, gen: rawRef.gen });
}
const stream = xref.fetchIfRef(rawRef);
if (!(stream instanceof BaseStream)) {
continue;
}
tokens.push(..._tokenizeStream(stream, xref));
}
const { instructions, cmdNames } = _groupIntoInstructions(tokens);
return { contentStream: true, instructions, cmdNames, rawContents };
}

// Lazily-built reverse map: OPS numeric id → property name string.
let _opsIdToName = null;

function _getOpsIdToName() {
if (!_opsIdToName) {
_opsIdToName = Object.create(null);
for (const [name, id] of Object.entries(OPS)) {
_opsIdToName[id] = name;
}
}
return _opsIdToName;
}

function _groupIntoInstructions(tokens) {
const { opMap } = EvaluatorPreprocessor;
const opsIdToName = _getOpsIdToName();
const instructions = [];
const cmdNames = Object.create(null);
const argBuffer = [];
for (const token of tokens) {
if (token.type !== "cmd") {
argBuffer.push(token);
continue;
}
const op = opMap[token.value];
if (op && !(token.value in cmdNames)) {
cmdNames[token.value] = opsIdToName[op.id];
}
let args;
if (!op || op.variableArgs) {
// Unknown command or variable args: consume all pending args.
args = argBuffer.splice(0);
} else {
// Fixed args: consume exactly numArgs, orphan the rest.
const orphanCount = Math.max(0, argBuffer.length - op.numArgs);
for (let i = 0; i < orphanCount; i++) {
instructions.push({ cmd: null, args: [argBuffer.shift()] });
}
args = argBuffer.splice(0);
}
instructions.push({ cmd: token.value, args });
}
for (const t of argBuffer) {
instructions.push({ cmd: null, args: [t] });
}
return { instructions, cmdNames };
}

function _tokenToJSObject(obj) {
if (obj instanceof Cmd) {
return { type: "cmd", value: obj.cmd };
}
if (obj instanceof Name) {
return { type: "name", value: obj.name };
}
if (obj instanceof Ref) {
return { type: "ref", num: obj.num, gen: obj.gen };
}
if (Array.isArray(obj)) {
return { type: "array", value: obj.map(_tokenToJSObject) };
}
if (obj instanceof Dict) {
const result = Object.create(null);
for (const [key, val] of obj.getRawEntries()) {
result[key] = _tokenToJSObject(val);
}
return { type: "dict", value: result };
}
if (typeof obj === "number") {
return { type: "number", value: obj };
}
if (typeof obj === "string") {
return { type: "string", value: obj };
}
if (typeof obj === "boolean") {
return { type: "boolean", value: obj };
}
if (obj === null) {
return { type: "null" };
}
return null;
}

export { Page, PDFDocument };
16 changes: 16 additions & 0 deletions src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,22 @@ class WorkerMessageHandler {
return pdfManager.fontFallback(data.id, handler);
});

handler.on("GetRawData", async function ({ ref, page }) {
if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) {
throw new Error("Not implemented: GetRawData");
}
let value = null;
if (page >= 1) {
value = (await pdfManager.ensureCatalog("getPageDict", [page - 1]))[1];
} else if (ref) {
value =
typeof ref === "string"
? Ref.fromString(ref)
: Ref.get(ref.num, ref.gen);
}
return pdfManager.ensureDoc("toJSObject", [value]);
});

handler.on("Cleanup", function (data) {
return pdfManager.cleanup(/* manuallyTriggered = */ true);
});
Expand Down
Loading