From fe01e0aa061d12a654efd6a5d9ff4ede8c995e01 Mon Sep 17 00:00:00 2001 From: Valentino Zegna Date: Fri, 8 May 2026 09:45:02 -0700 Subject: [PATCH] fix: validate /analyze body via AnalyzePdfInputSchema (closes #42) The HTTP /analyze endpoint hand-rolled a truthy check on pdf_source and queries. That check missed non-string truthy values (numbers, objects, String wrappers), so malformed bodies reached analyzePdf and crashed inside validateLocalPath with "Cannot read properties of undefined (reading 'trim')". Meanwhile the MCP path was already strict because the SDK validates against zod before invoking the tool handler. This fixes the actual root cause: route the REST path through the same schema that the MCP path uses, hoist the field defs into AnalyzePdfInputShape so MCP and HTTP share one source of truth, and return zod's path-aware error details instead of a generic 400. No defensive guards added inside pdf-utils.ts, since callers can no longer reach those helpers with non-string values. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/server.ts | 14 ++------ src/transports/http.test.ts | 69 +++++++++++++++++++++++++++++++++++++ src/transports/http.ts | 30 +++++++++++----- src/types.ts | 11 +++--- 4 files changed, 100 insertions(+), 24 deletions(-) diff --git a/src/server.ts b/src/server.ts index 78ff429..11876d1 100644 --- a/src/server.ts +++ b/src/server.ts @@ -7,9 +7,9 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; -import { z } from "zod"; import { VERSION } from "./version.js"; import { analyzePdf } from "./service.js"; +import { AnalyzePdfInputShape } from "./types.js"; import { resolveActiveProvider } from "./providers/registry.js"; // Cloud-only modules loaded lazily to avoid pulling in heavy deps in stdio mode. // import { startHttpServer } from "./transports/http.js"; @@ -138,17 +138,7 @@ export const createServer = (mode: "stdio" | "http" = "stdio"): McpServer => { "analyze_pdf", { description: mode === "http" ? ANALYZE_PDF_DESCRIPTION_HTTP : ANALYZE_PDF_DESCRIPTION_STDIO, - inputSchema: { - pdf_source: z - .union([z.string(), z.array(z.string().min(1)).min(1)]) - .describe( - "PDF source: absolute local file path, URL, cached file URI from a previous response (Google only), or array of cached file URIs from a previous chunked response (Google only)" - ), - queries: z - .array(z.string().min(1)) - .min(1) - .describe("Array of questions to ask about the PDF"), - }, + inputSchema: AnalyzePdfInputShape, }, async ({ pdf_source, queries }) => { try { diff --git a/src/transports/http.test.ts b/src/transports/http.test.ts index 2625c43..192afec 100644 --- a/src/transports/http.test.ts +++ b/src/transports/http.test.ts @@ -104,4 +104,73 @@ describe("HTTP transport", () => { const res = await fetch(`${baseUrl}/mcp`, { method: "DELETE" }); expect(res.status).not.toBe(404); }); + + // Regression for #42: malformed /analyze bodies were reaching analyzePdf and + // crashing inside validateLocalPath with "Cannot read properties of undefined + // (reading 'trim')". The handler now runs zod up front so callers get a + // descriptive 400 instead. + describe("POST /analyze input validation", () => { + async function postAnalyze(baseUrl: string, body: unknown): Promise { + return fetch(`${baseUrl}/analyze`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + } + + it("rejects missing pdf_source with 400 and a zod path", async () => { + const { baseUrl, server } = await startTestServer(); + testServer = server; + + const res = await postAnalyze(baseUrl, { queries: ["What is this?"] }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string; details: string[] }; + expect(body.error).toBe("Invalid request body"); + expect(body.details.join("\n")).toContain("pdf_source"); + }); + + it("rejects non-string, non-array pdf_source with 400", async () => { + const { baseUrl, server } = await startTestServer(); + testServer = server; + + const res = await postAnalyze(baseUrl, { pdf_source: 123, queries: ["q"] }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string; details: string[] }; + expect(body.details.join("\n")).toContain("pdf_source"); + }); + + it("rejects empty queries array with 400", async () => { + const { baseUrl, server } = await startTestServer(); + testServer = server; + + const res = await postAnalyze(baseUrl, { pdf_source: "/tmp/x.pdf", queries: [] }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string; details: string[] }; + expect(body.details.join("\n")).toContain("queries"); + }); + + it("rejects empty-string queries with 400", async () => { + const { baseUrl, server } = await startTestServer(); + testServer = server; + + const res = await postAnalyze(baseUrl, { pdf_source: "/tmp/x.pdf", queries: [""] }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string; details: string[] }; + expect(body.details.join("\n")).toContain("queries"); + }); + + it("rejects non-JSON body with 400", async () => { + const { baseUrl, server } = await startTestServer(); + testServer = server; + + const res = await fetch(`${baseUrl}/analyze`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: "not json", + }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string }; + expect(body.error).toContain("JSON"); + }); + }); }); diff --git a/src/transports/http.ts b/src/transports/http.ts index bc69cbf..32684a3 100644 --- a/src/transports/http.ts +++ b/src/transports/http.ts @@ -12,6 +12,7 @@ import type { IncomingMessage, ServerResponse } from "node:http"; import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; import { analyzePdf } from "../service.js"; +import { AnalyzePdfInputSchema } from "../types.js"; import { resolveActiveProvider } from "../providers/registry.js"; /** @@ -33,16 +34,29 @@ function readBody(req: IncomingMessage): Promise { * Response body: AnalyzePdfResponse JSON */ async function handleAnalyze(req: IncomingMessage, res: ServerResponse): Promise { + let body: unknown; + try { + body = JSON.parse(await readBody(req)); + } catch { + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Request body must be valid JSON" })); + return; + } + + const parsed = AnalyzePdfInputSchema.safeParse(body); + if (!parsed.success) { + const issues = parsed.error.issues.map((issue) => { + const pathStr = issue.path.length > 0 ? issue.path.join(".") : "(root)"; + return `${pathStr}: ${issue.message}`; + }); + res.writeHead(400, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: "Invalid request body", details: issues })); + return; + } + try { - const body = JSON.parse(await readBody(req)); - const { pdf_source, queries } = body; - if (!pdf_source || !queries || !Array.isArray(queries) || queries.length === 0) { - res.writeHead(400, { "Content-Type": "application/json" }); - res.end(JSON.stringify({ error: "Required: pdf_source (string) and queries (string[])" })); - return; - } const { provider, apiKey, modelId } = await resolveActiveProvider(); - const result = await analyzePdf(provider, apiKey, modelId, { pdf_source, queries }); + const result = await analyzePdf(provider, apiKey, modelId, parsed.data); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(result)); } catch (error) { diff --git a/src/types.ts b/src/types.ts index 16749b3..f48a6a8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,14 +1,17 @@ import { z } from "zod"; -/** Schema for the analyze_pdf tool input */ -export const AnalyzePdfInputSchema = z.object({ +/** Field schemas for the analyze_pdf tool input. Shared between MCP and HTTP. */ +export const AnalyzePdfInputShape = { pdf_source: z .union([z.string(), z.array(z.string().min(1)).min(1)]) .describe( - "PDF source: file path, URL, single Gemini file URI, or array of Gemini file URIs from a previous chunked response" + "PDF source: absolute local file path, URL, cached file URI from a previous response (Google only), or array of cached file URIs from a previous chunked response (Google only)" ), queries: z.array(z.string().min(1)).min(1).describe("Array of questions to ask about the PDF"), -}); +}; + +/** Schema for the analyze_pdf tool input */ +export const AnalyzePdfInputSchema = z.object(AnalyzePdfInputShape); /** Input type for the analyze_pdf tool */ export type AnalyzePdfInput = z.infer;