Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions src/supervisor/agents/commandcode/commandcode.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ import type { ProjectLocation, ThreadConfig } from "@/shared/contracts";
import { createCommandCodeAdapter } from ".";
import { buildCommandCodeArgs } from "./argv";
import {
buildCommandCodeModelPickerCapabilities,
COMMANDCODE_DEFAULT_MODEL_ID,
commandCodeDetectionSpec,
defaultCommandCodeCapabilities,
parseCommandCodeModels,
} from "./detection";
import { authJsonHasApiKey, detectCommandCodeInvalidSessionRef } from "./session";
import { detectCommandCodeTerminalStatus } from "./terminal";
Expand Down Expand Up @@ -239,6 +241,115 @@ describe("commandCodeDetectionSpec auth", () => {
});
});

// A faithful slice of real `command-code --list-models` stdout: the header, a
// couple of vendor sections (including a namespace we don't curate: nvidia),
// the `(default)` / `(recommended)` markers, and the trailing usage/docs footer
// the parser must ignore.
const LIST_MODELS_FIXTURE = `Available models · 30 models

Open Source

deepseek/deepseek-v4-pro hybrid-attention long-context reasoning
deepseek/deepseek-v4-flash fast hybrid-attention reasoning (default)
moonshotai/Kimi-K2.7-Code improved long-horizon coding with vision
nvidia/nemotron-3-ultra-550b-a55b open reasoning model for long-horizon autonomous agents

Anthropic

claude-sonnet-4-6 best combo of speed & intelligence (recommended)
claude-fable-5 most capable for demanding reasoning & long-horizon agents

OpenAI

gpt-5.5 latest frontier model for general complex work

Pass the full id, or just the short name after the last "/":
cmd --model moonshotai/Kimi-K2.5
cmd --model kimi-k2.5

Docs: https://commandcode.ai/docs/reference/cli/models
`;

describe("parseCommandCodeModels", () => {
it("extracts ids + taglines and skips headers, footer and the docs line", () => {
const parsed = parseCommandCodeModels(LIST_MODELS_FIXTURE);
expect(parsed.map((m) => m.id)).toEqual([
"deepseek/deepseek-v4-pro",
"deepseek/deepseek-v4-flash",
"moonshotai/Kimi-K2.7-Code",
"nvidia/nemotron-3-ultra-550b-a55b",
"claude-sonnet-4-6",
"claude-fable-5",
"gpt-5.5",
]);
// The `Docs: https://…` footer has a 2-space gap like a model row, so the
// id guard (no colon) is what keeps it out.
expect(parsed.some((m) => m.id.startsWith("Docs"))).toBe(false);
});

it("flags the (default) model and strips the marker from its tagline", () => {
const parsed = parseCommandCodeModels(LIST_MODELS_FIXTURE);
const def = parsed.find((m) => m.isDefault);
expect(def?.id).toBe("deepseek/deepseek-v4-flash");
expect(def?.description).toBe("fast hybrid-attention reasoning");
expect(parsed.find((m) => m.id === "claude-sonnet-4-6")?.description).toBe(
"best combo of speed & intelligence",
);
});

it("returns an empty list for unparseable output", () => {
expect(parseCommandCodeModels("")).toEqual([]);
expect(parseCommandCodeModels("totally unrelated text\nno models here")).toEqual([]);
});
});

describe("buildCommandCodeModelPickerCapabilities", () => {
it("labels models, hoists the default first, and groups by sub-provider", () => {
const caps = buildCommandCodeModelPickerCapabilities(
parseCommandCodeModels(LIST_MODELS_FIXTURE),
);

// Default is surfaced first so a fresh thread mirrors the CLI default.
expect(caps.models[0]?.id).toBe("deepseek/deepseek-v4-flash");

const byId = new Map(caps.models.map((m) => [m.id, m]));
// Curated label override wins; humanize is only the fallback.
expect(byId.get("moonshotai/Kimi-K2.7-Code")?.label).toBe("Kimi K2.7 Code");
expect(byId.get("gpt-5.5")?.label).toBe("GPT-5.5");
// The CLI tagline rides along as the (search/tooltip) description.
expect(byId.get("moonshotai/Kimi-K2.7-Code")?.description).toBe(
"improved long-horizon coding with vision",
);

// Un-namespaced ids map explicitly; slash-namespaced ids derive from prefix.
expect(caps.modelSubProvider?.["claude-fable-5"]).toBe("anthropic");
expect(caps.modelSubProvider?.["gpt-5.5"]).toBe("openai");
expect(caps.modelSubProvider?.["nvidia/nemotron-3-ultra-550b-a55b"]).toBe("nvidia");

// Curated label for a known sub-provider; humanized fallback for nvidia.
const subById = new Map((caps.subProviders ?? []).map((s) => [s.id, s.label]));
expect(subById.get("deepseek")).toBe("DeepSeek");
expect(subById.get("nvidia")).toBe("NVIDIA");
});

it("falls back to a humanized label for an uncurated id", () => {
const caps = buildCommandCodeModelPickerCapabilities([{ id: "acme/new-shiny-model" }]);
expect(caps.models[0]?.label).toBe("New Shiny Model");
expect(caps.modelSubProvider?.["acme/new-shiny-model"]).toBe("acme");
expect((caps.subProviders ?? []).find((s) => s.id === "acme")?.label).toBe("Acme");
});
});

describe("commandCodeDetectionSpec capabilitiesProbe", () => {
it("returns only the terminal auth method when the binary is absent", async () => {
const result = await commandCodeDetectionSpec.capabilitiesProbe?.({
location: { kind: "posix", path: "/tmp" },
executablePath: undefined,
});
expect(result).toBeUndefined();
});
});

describe("detectCommandCodeInvalidSessionRef", () => {
it("detects empty-continue messages", () => {
expect(detectCommandCodeInvalidSessionRef("No previous conversation found")).toBe(true);
Expand Down
Loading