Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 182 additions & 1 deletion src/__tests__/integration/api/workspaces/evals/evals.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, test, expect, beforeEach, vi } from "vitest";
import { GET as getEvalSets, POST as createEvalSet } from "@/app/api/workspaces/[slug]/evals/route";
import { POST as createRequirement } from "@/app/api/workspaces/[slug]/evals/[evalSetId]/requirements/route";
import { GET as getRequirements, POST as createRequirement } from "@/app/api/workspaces/[slug]/evals/[evalSetId]/requirements/route";
import { POST as linkRuns } from "@/app/api/workspaces/[slug]/evals/[evalSetId]/requirements/[reqId]/runs/route";
import { GET as getSessions } from "@/app/api/workspaces/[slug]/evals/sessions/route";
import {
Expand Down Expand Up @@ -360,6 +360,187 @@ describe("Evals API — Integration Tests", () => {
});
});

// ---------------------------------------------------------------------------
// GET /api/workspaces/[slug]/evals/[evalSetId]/requirements
// ---------------------------------------------------------------------------
describe("GET /api/workspaces/[slug]/evals/[evalSetId]/requirements", () => {
describe("Success", () => {
test("returns requirements with order merged from edges", async () => {
const owner = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });
await createTestMembership({ workspaceId: workspace.id, userId: owner.id, role: "OWNER" });
await createTestSwarm({ workspaceId: workspace.id, swarmApiKey: "test-key" });

const mockNodes = [
{ ref_id: "req-1", node_type: "EvalRequirement", properties: { name: "Req A" } },
{ ref_id: "req-2", node_type: "EvalRequirement", properties: { name: "Req B" } },
];
const mockEdges = [
{ target_ref_id: "req-1", properties: { order: 0 } },
{ target_ref_id: "req-2", properties: { order: 1 } },
];

global.fetch = vi.fn().mockResolvedValueOnce({
ok: true,
json: async () => ({ nodes: mockNodes, edges: mockEdges }),
} as any);

const request = createAuthenticatedGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/eval-set-1/requirements`,
owner,
);

const response = await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "eval-set-1" }),
});

expect(response.status).toBe(200);
const data = await response.json();
expect(data.success).toBe(true);
expect(data.data.nodes).toHaveLength(2);
expect(data.data.total).toBe(2);
expect(data.data.nodes[0].properties.order).toBe(0);
expect(data.data.nodes[1].properties.order).toBe(1);
});

test("returns empty array when eval set has no requirements", async () => {
const owner = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });
await createTestMembership({ workspaceId: workspace.id, userId: owner.id, role: "OWNER" });
await createTestSwarm({ workspaceId: workspace.id, swarmApiKey: "test-key" });

global.fetch = vi.fn().mockResolvedValueOnce({
ok: true,
json: async () => ({ nodes: [], edges: [] }),
} as any);

const request = createAuthenticatedGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/eval-set-empty/requirements`,
owner,
);

const response = await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "eval-set-empty" }),
});

expect(response.status).toBe(200);
const data = await response.json();
expect(data.success).toBe(true);
expect(data.data.nodes).toEqual([]);
expect(data.data.total).toBe(0);
});

test("calls Jarvis with correct URL including Python list literal params", async () => {
const owner = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });
await createTestMembership({ workspaceId: workspace.id, userId: owner.id, role: "OWNER" });
await createTestSwarm({ workspaceId: workspace.id, swarmApiKey: "test-key" });

const fetchMock = vi.fn().mockResolvedValueOnce({
ok: true,
json: async () => ({ nodes: [], edges: [] }),
} as any);
global.fetch = fetchMock;

const request = createAuthenticatedGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/my-eval-set/requirements`,
owner,
);

await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "my-eval-set" }),
});

const calledUrl: string = fetchMock.mock.calls[0][0];
expect(calledUrl).toContain("/v2/nodes/my-eval-set");
expect(calledUrl).toContain("expand=edges");
expect(calledUrl).toContain(encodeURIComponent("['HAS_REQUIREMENT']"));
expect(calledUrl).toContain(encodeURIComponent("['EvalRequirement']"));
expect(calledUrl).toContain("depth=1");
});
});

describe("Auth failures", () => {
test("rejects unauthenticated requests", async () => {
const owner = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });

const request = createGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/eval-set-1/requirements`,
);

const response = await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "eval-set-1" }),
});

await expectUnauthorized(response);
});

test("rejects non-member", async () => {
const owner = await createTestUser();
const nonMember = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });
await createTestSwarm({ workspaceId: workspace.id, swarmApiKey: "test-key" });

const request = createAuthenticatedGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/eval-set-1/requirements`,
nonMember,
);

const response = await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "eval-set-1" }),
});

await expectForbidden(response, "Access denied");
});
});

describe("Swarm not configured", () => {
test("returns 400 when workspace has no swarm", async () => {
const owner = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });
await createTestMembership({ workspaceId: workspace.id, userId: owner.id, role: "OWNER" });

const request = createAuthenticatedGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/eval-set-1/requirements`,
owner,
);

const response = await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "eval-set-1" }),
});

await expectError(response, "Swarm not configured", 400);
});
});

describe("Upstream failure", () => {
test("returns 502 when Jarvis returns non-ok", async () => {
const owner = await createTestUser();
const workspace = await createTestWorkspace({ ownerId: owner.id });
await createTestMembership({ workspaceId: workspace.id, userId: owner.id, role: "OWNER" });
await createTestSwarm({ workspaceId: workspace.id, swarmApiKey: "test-key" });

global.fetch = vi.fn().mockResolvedValueOnce({
ok: false,
status: 503,
text: async () => "Service Unavailable",
} as any);

const request = createAuthenticatedGetRequest(
`http://localhost:3000/api/workspaces/${workspace.slug}/evals/eval-set-1/requirements`,
owner,
);

const response = await getRequirements(request, {
params: Promise.resolve({ slug: workspace.slug, evalSetId: "eval-set-1" }),
});

expect(response.status).toBe(502);
});
});
});

// ---------------------------------------------------------------------------
// POST /api/workspaces/[slug]/evals/[evalSetId]/requirements
// ---------------------------------------------------------------------------
Expand Down
103 changes: 103 additions & 0 deletions src/__tests__/unit/api/mock/evals-requirements.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import { describe, test, expect } from "vitest";
import { GET, POST } from "@/app/api/mock/evals/[evalSetId]/requirements/route";
import { NextRequest } from "next/server";

function makeGetRequest(evalSetId: string): NextRequest {
return new NextRequest(
`http://localhost:3000/api/mock/evals/${evalSetId}/requirements`,
{ method: "GET" },
);
}

function makePostRequest(evalSetId: string, body: object): NextRequest {
return new NextRequest(
`http://localhost:3000/api/mock/evals/${evalSetId}/requirements`,
{
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
},
);
}

describe("GET /api/mock/evals/[evalSetId]/requirements", () => {
test("returns seeded requirements for eval-set-1", async () => {
const request = makeGetRequest("eval-set-1");
const response = await GET(request, { params: Promise.resolve({ evalSetId: "eval-set-1" }) });
const data = await response.json();

expect(data.success).toBe(true);
expect(data.data.nodes.length).toBeGreaterThan(0);
expect(data.data.total).toBe(data.data.nodes.length);

// All nodes belong to EvalRequirement type
for (const node of data.data.nodes) {
expect(node.node_type).toBe("EvalRequirement");
expect(node.ref_id).toBeDefined();
expect(node.properties.name).toBeDefined();
expect(node.properties.prompt_snippet).toBeDefined();
expect(Array.isArray(node.properties.positive_cases)).toBe(true);
expect(Array.isArray(node.properties.negative_cases)).toBe(true);
expect(typeof node.properties.order).toBe("number");
}
});

test("returns seeded requirements for eval-set-2", async () => {
const request = makeGetRequest("eval-set-2");
const response = await GET(request, { params: Promise.resolve({ evalSetId: "eval-set-2" }) });
const data = await response.json();

expect(data.success).toBe(true);
expect(data.data.nodes.length).toBeGreaterThan(0);
});

test("eval-set-1 and eval-set-2 return different requirements", async () => {
const res1 = await GET(makeGetRequest("eval-set-1"), { params: Promise.resolve({ evalSetId: "eval-set-1" }) });
const res2 = await GET(makeGetRequest("eval-set-2"), { params: Promise.resolve({ evalSetId: "eval-set-2" }) });

const data1 = await res1.json();
const data2 = await res2.json();

const ids1 = data1.data.nodes.map((n: { ref_id: string }) => n.ref_id);
const ids2 = data2.data.nodes.map((n: { ref_id: string }) => n.ref_id);

// No overlap between the two sets
const overlap = ids1.filter((id: string) => ids2.includes(id));
expect(overlap).toHaveLength(0);
});

test("returns empty array for unknown eval set id", async () => {
const request = makeGetRequest("unknown-eval-set-xyz");
const response = await GET(request, { params: Promise.resolve({ evalSetId: "unknown-eval-set-xyz" }) });
const data = await response.json();

expect(data.success).toBe(true);
expect(data.data.nodes).toEqual([]);
expect(data.data.total).toBe(0);
});

test("returns 200 status", async () => {
const request = makeGetRequest("eval-set-1");
const response = await GET(request, { params: Promise.resolve({ evalSetId: "eval-set-1" }) });
expect(response.status).toBe(200);
});
});

describe("POST /api/mock/evals/[evalSetId]/requirements", () => {
test("creates a new requirement node and returns its ref_id", async () => {
const body = {
name: "Test Req",
description: "A description",
prompt_snippet: "When asked to do X",
positive_cases: ["Does X correctly"],
negative_cases: ["Fails silently"],
};
const request = makePostRequest("eval-set-1", body);
const response = await POST(request);
const data = await response.json();

expect(data.success).toBe(true);
expect(typeof data.data.ref_id).toBe("string");
expect(data.data.ref_id.length).toBeGreaterThan(0);
});
});
Loading
Loading