Skip to content

Commit 03ea408

Browse files
committed
sync(bfmono): feat(gambit-simulator): add previous test run selectors for test and grade (+19 more) (bfmono@e61fcaa4e)
This PR is an automated gambitmono sync of bfmono Gambit packages. - Source: `packages/gambit/` - Core: `packages/gambit/packages/gambit-core/` - bfmono rev: e61fcaa4e Changes: - e61fcaa4e feat(gambit-simulator): add previous test run selectors for test and grade - 4f0a61e0c fix(gambit): keep blank test runs and allow assistant starts in a workspace - 5663a6c3e fix(gambit-simulator): prevent overlapping test status polls - c7a262c7d fix(gambit): enable unstable worker options in gambit-core CI tests - 5660b3269 test(gambit): harden codex cli test command config - a9d5438f9 chore(gambit): cut 0.8.5-rc.1 release - d6a149451 test(gambit): harden codex smoke + build stop tests - 6c76075fa fix(gambit-serve): root workspace state in invocation directory - de75a3987 feat(gambit): ship faq openresponses interoperability updates - 11e0477a3 fix(gambit-sim): default test init when workspace has no saved messages - 14df328c7 test(gambit-simulator): cover test tab JSON input gating and payload wiring - 2e833821c feat(gambit-simulator): switch test tab schema inputs to JSON-only - 126087519 fix(simulator-ui): keep activity toggle visible and prevent collapse jump - 0c0366932 feat(simulator-ui): add test chat actions activity card parity - 72bb726e5 fix(gambit): guard artifact restore typing and stabilize reset-abort test - b65a0567e fix(gambit): harden CI import-map prep for bfmono and mirror layouts - 4a30aca91 fix(gambit): align standalone CI config deps and typing - 0e7914222 fix(providers): propagate abort signal in codex responses and add conformance coverage - b504cb290 fix(gambit): correct relative deck paths after core relocation - 5d9de8c94 fix(gambit): align verification artifacts after core relocation Do not edit this repo directly; make changes in bfmono and re-run the sync.
1 parent 18e3779 commit 03ea408

8 files changed

Lines changed: 1402 additions & 107 deletions

File tree

simulator-ui/src/GradePage.tsx

Lines changed: 258 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import {
2525
getScoreClass,
2626
GRADE_STREAM_ID,
2727
isTurnsResult,
28+
scenarioNameFromValue,
2829
setDurableStreamOffset,
2930
} from "./utils.ts";
3031
import type {
@@ -41,6 +42,74 @@ import PageShell from "./gds/PageShell.tsx";
4142
import Panel from "./gds/Panel.tsx";
4243
import { useWorkspaceGrade, useWorkspaceRouting } from "./WorkspaceContext.tsx";
4344

45+
type ScenarioRunSummary = {
46+
scenarioRunId: string;
47+
lastEventSeq?: number;
48+
updatedAt?: string;
49+
selectedScenarioDeckId?: string;
50+
selectedScenarioDeckLabel?: string;
51+
scenarioConfigPath?: string;
52+
};
53+
54+
const parseScenarioRunSummary = (value: unknown): ScenarioRunSummary | null => {
55+
if (!value || typeof value !== "object") return null;
56+
const summary = value as Record<string, unknown>;
57+
const scenarioRunId = typeof summary.scenarioRunId === "string"
58+
? summary.scenarioRunId
59+
: null;
60+
if (!scenarioRunId) return null;
61+
return {
62+
scenarioRunId,
63+
lastEventSeq: typeof summary.lastEventSeq === "number" &&
64+
Number.isFinite(summary.lastEventSeq)
65+
? summary.lastEventSeq
66+
: undefined,
67+
updatedAt: typeof summary.updatedAt === "string"
68+
? summary.updatedAt
69+
: undefined,
70+
selectedScenarioDeckId: typeof summary.selectedScenarioDeckId === "string"
71+
? summary.selectedScenarioDeckId
72+
: undefined,
73+
selectedScenarioDeckLabel:
74+
typeof summary.selectedScenarioDeckLabel === "string"
75+
? summary.selectedScenarioDeckLabel
76+
: undefined,
77+
scenarioConfigPath: typeof summary.scenarioConfigPath === "string"
78+
? summary.scenarioConfigPath
79+
: undefined,
80+
};
81+
};
82+
83+
const getScenarioTitle = (summary: ScenarioRunSummary): string => {
84+
const fromDeckLabel = typeof summary.selectedScenarioDeckLabel === "string" &&
85+
summary.selectedScenarioDeckLabel.trim().length > 0
86+
? summary.selectedScenarioDeckLabel
87+
: null;
88+
const fromDeckId = typeof summary.selectedScenarioDeckId === "string" &&
89+
summary.selectedScenarioDeckId.trim().length > 0
90+
? scenarioNameFromValue(summary.selectedScenarioDeckId) ??
91+
summary.selectedScenarioDeckId
92+
: null;
93+
const fromPath = scenarioNameFromValue(summary.scenarioConfigPath ?? null) ??
94+
botFilename(summary.scenarioConfigPath ?? null);
95+
return fromDeckLabel ?? fromDeckId ?? fromPath ?? summary.scenarioRunId;
96+
};
97+
98+
const scenarioRunIdFromCalibrationRun = (
99+
run: CalibrationRun,
100+
): string | null => {
101+
if (!run.input || typeof run.input !== "object") return null;
102+
const input = run.input as Record<string, unknown>;
103+
const session = input.session;
104+
if (!session || typeof session !== "object") return null;
105+
const meta = (session as { meta?: unknown }).meta;
106+
if (!meta || typeof meta !== "object") return null;
107+
const scenarioRunId = (meta as { scenarioRunId?: unknown }).scenarioRunId;
108+
return typeof scenarioRunId === "string" && scenarioRunId.trim().length > 0
109+
? scenarioRunId
110+
: null;
111+
};
112+
44113
function GradePage(
45114
{
46115
setNavActions,
@@ -79,6 +148,7 @@ function GradePage(
79148
updateFlagReason: updateGradeFlagReason,
80149
} = workspaceGrade;
81150
const workspaceRouting = useWorkspaceRouting();
151+
const routedTestRunId = workspaceRouting.testRunId;
82152
const initialCalibrateSessionRef = useRef<string | null>(
83153
getGradeWorkspaceIdFromLocation(),
84154
);
@@ -88,6 +158,9 @@ function GradePage(
88158
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(
89159
initialCalibrateSessionRef.current ?? activeWorkspaceId ?? null,
90160
);
161+
const [selectedTestRunId, setSelectedTestRunId] = useState<string | null>(
162+
null,
163+
);
91164
const [selectedGraderId, setSelectedGraderId] = useState<string | null>(null);
92165
useEffect(() => {
93166
setSelectedSessionId((prev) => {
@@ -161,12 +234,96 @@ function GradePage(
161234
() => graders.find((grader) => grader.id === selectedGraderId) ?? null,
162235
[graders, selectedGraderId],
163236
);
237+
const testRunOptions = useMemo(() => {
238+
const meta = sessionDetail?.meta && typeof sessionDetail.meta === "object"
239+
? sessionDetail.meta as Record<string, unknown>
240+
: {};
241+
const fromList = Array.isArray(meta.scenarioRunSummaries)
242+
? meta.scenarioRunSummaries.map((entry) => parseScenarioRunSummary(entry))
243+
: [];
244+
const fromCurrent = parseScenarioRunSummary(meta.scenarioRunSummary);
245+
const all = [...fromList, fromCurrent].filter(
246+
(entry): entry is ScenarioRunSummary => Boolean(entry),
247+
);
248+
const deduped = new Map<string, ScenarioRunSummary>();
249+
all.forEach((entry) => {
250+
const existing = deduped.get(entry.scenarioRunId);
251+
if (!existing) {
252+
deduped.set(entry.scenarioRunId, entry);
253+
return;
254+
}
255+
const existingSeq = existing.lastEventSeq ?? -1;
256+
const nextSeq = entry.lastEventSeq ?? -1;
257+
if (nextSeq > existingSeq) {
258+
deduped.set(entry.scenarioRunId, entry);
259+
return;
260+
}
261+
if (nextSeq === existingSeq) {
262+
const existingStamp = existing.updatedAt ?? "";
263+
const nextStamp = entry.updatedAt ?? "";
264+
if (nextStamp.localeCompare(existingStamp) > 0) {
265+
deduped.set(entry.scenarioRunId, entry);
266+
}
267+
}
268+
});
269+
return [...deduped.values()].sort((a, b) => {
270+
const aTime = Date.parse(a.updatedAt ?? "");
271+
const bTime = Date.parse(b.updatedAt ?? "");
272+
const aValidTime = Number.isFinite(aTime) ? aTime : -1;
273+
const bValidTime = Number.isFinite(bTime) ? bTime : -1;
274+
if (aValidTime !== bValidTime) return bValidTime - aValidTime;
275+
const aSeq = a.lastEventSeq ?? -1;
276+
const bSeq = b.lastEventSeq ?? -1;
277+
if (aSeq !== bSeq) return bSeq - aSeq;
278+
return b.scenarioRunId.localeCompare(a.scenarioRunId);
279+
});
280+
}, [sessionDetail?.meta]);
281+
282+
useEffect(() => {
283+
const hasOption = (runId: string | null | undefined): runId is string =>
284+
Boolean(
285+
runId &&
286+
testRunOptions.some((entry) => entry.scenarioRunId === runId),
287+
);
288+
const meta = sessionDetail?.meta && typeof sessionDetail.meta === "object"
289+
? sessionDetail.meta as Record<string, unknown>
290+
: {};
291+
const currentScenarioRunId = typeof meta.scenarioRunId === "string" &&
292+
meta.scenarioRunId.trim().length > 0
293+
? meta.scenarioRunId
294+
: null;
295+
const nextRunId = hasOption(routedTestRunId)
296+
? routedTestRunId
297+
: hasOption(selectedTestRunId)
298+
? selectedTestRunId
299+
: hasOption(currentScenarioRunId)
300+
? currentScenarioRunId
301+
: testRunOptions[0]?.scenarioRunId ?? null;
302+
if (selectedTestRunId !== nextRunId) {
303+
setSelectedTestRunId(nextRunId);
304+
}
305+
if (routedTestRunId !== nextRunId) {
306+
workspaceRouting.setTestRunId(nextRunId);
307+
}
308+
}, [
309+
routedTestRunId,
310+
selectedTestRunId,
311+
sessionDetail?.meta,
312+
testRunOptions,
313+
workspaceRouting,
314+
]);
164315
const sessionRuns = useMemo(() => {
165316
if (!selectedSession?.gradingRuns) return [];
166317
return [...selectedSession.gradingRuns].reverse();
167318
}, [selectedSession]);
319+
const filteredSessionRuns = useMemo(() => {
320+
if (!selectedTestRunId) return sessionRuns;
321+
return sessionRuns.filter((run) =>
322+
scenarioRunIdFromCalibrationRun(run) === selectedTestRunId
323+
);
324+
}, [selectedTestRunId, sessionRuns]);
168325
const runSections = useMemo(() => {
169-
return sessionRuns.map((run) => {
326+
return filteredSessionRuns.map((run) => {
170327
const items: Array<{
171328
key: string;
172329
label: string;
@@ -269,7 +426,7 @@ function GradePage(
269426
items,
270427
};
271428
});
272-
}, [sessionRuns]);
429+
}, [filteredSessionRuns]);
273430
const runItems = useMemo(
274431
() => runSections.flatMap((section) => section.items),
275432
[runSections],
@@ -289,6 +446,7 @@ function GradePage(
289446
return new Set(gradingFlags.map((flag) => flag.refId));
290447
}, [gradingFlags]);
291448
const [expandedRunId, setExpandedRunId] = useState<string | null>(null);
449+
const [optimisticRunId, setOptimisticRunId] = useState<string | null>(null);
292450
const [expandedResults, setExpandedResults] = useState<
293451
Record<string, boolean>
294452
>({});
@@ -305,13 +463,37 @@ function GradePage(
305463
setExpandedRunId(routeGradeRunId);
306464
workspaceRouting.setGradeRunId(routeGradeRunId);
307465
}, [routeGradeRunId, workspaceRouting]);
466+
useEffect(() => {
467+
if (!optimisticRunId) return;
468+
if (!sessionRuns.some((run) => run.id === optimisticRunId)) return;
469+
setOptimisticRunId(null);
470+
}, [optimisticRunId, sessionRuns]);
471+
useEffect(() => {
472+
if (!routeGradeRunId || !selectedTestRunId) return;
473+
const routeRun = sessionRuns.find((run) => run.id === routeGradeRunId);
474+
if (!routeRun) return;
475+
if (scenarioRunIdFromCalibrationRun(routeRun) === selectedTestRunId) return;
476+
setExpandedRunId(null);
477+
setRouteGradeRunId(null);
478+
setOptimisticRunId(null);
479+
workspaceRouting.setGradeRunId(null);
480+
updateCalibratePath(selectedSessionId, { gradeRunId: null });
481+
}, [
482+
routeGradeRunId,
483+
selectedSessionId,
484+
selectedTestRunId,
485+
sessionRuns,
486+
updateCalibratePath,
487+
workspaceRouting,
488+
]);
308489
const routeRunNotFound = useMemo(
309490
() =>
310491
Boolean(
311492
routeGradeRunId &&
312-
!runSections.some((section) => section.run.id === routeGradeRunId),
493+
routeGradeRunId !== optimisticRunId &&
494+
!sessionRuns.some((run) => run.id === routeGradeRunId),
313495
),
314-
[routeGradeRunId, runSections],
496+
[optimisticRunId, routeGradeRunId, sessionRuns],
315497
);
316498

317499
useEffect(() => {
@@ -416,16 +598,48 @@ function GradePage(
416598
const data = await runGrade({
417599
workspaceId: selectedSessionId,
418600
graderId: selectedGraderId,
601+
scenarioRunId: selectedTestRunId ?? undefined,
419602
});
603+
const returnedRun = data.run;
420604
const runs = Array.isArray(data.session?.gradingRuns)
421605
? data.session!.gradingRuns
422606
: [];
423-
const latestRun = runs.length > 0 ? runs[runs.length - 1] : null;
607+
const latestRun = (() => {
608+
if (
609+
returnedRun?.id &&
610+
(
611+
!selectedTestRunId ||
612+
scenarioRunIdFromCalibrationRun(returnedRun) === selectedTestRunId
613+
)
614+
) {
615+
return returnedRun;
616+
}
617+
if (!runs.length) return null;
618+
if (!selectedTestRunId) {
619+
return returnedRun ?? runs[runs.length - 1] ?? null;
620+
}
621+
for (let i = runs.length - 1; i >= 0; i -= 1) {
622+
const candidate = runs[i];
623+
if (
624+
scenarioRunIdFromCalibrationRun(candidate) === selectedTestRunId
625+
) {
626+
return candidate;
627+
}
628+
}
629+
return null;
630+
})();
424631
if (latestRun?.id) {
425632
setExpandedRunId(latestRun.id);
426633
setRouteGradeRunId(latestRun.id);
634+
setOptimisticRunId(latestRun.id);
427635
workspaceRouting.setGradeRunId(latestRun.id);
428636
updateCalibratePath(selectedSessionId, { gradeRunId: latestRun.id });
637+
} else {
638+
setExpandedRunId(null);
639+
setRouteGradeRunId(null);
640+
setOptimisticRunId(null);
641+
workspaceRouting.setGradeRunId(null);
642+
updateCalibratePath(selectedSessionId, { gradeRunId: null });
429643
}
430644
} catch (err) {
431645
console.error(err);
@@ -434,12 +648,30 @@ function GradePage(
434648
runGrade,
435649
selectedGraderId,
436650
selectedSessionId,
651+
selectedTestRunId,
437652
updateCalibratePath,
438653
workspaceRouting,
439654
]);
440655

441656
const canRun = Boolean(selectedSessionId && selectedGraderId && !running);
442657

658+
const handleTestRunSelection = useCallback((nextRunId: string) => {
659+
if (!nextRunId) return;
660+
if (nextRunId === selectedTestRunId) return;
661+
setExpandedRunId(null);
662+
setRouteGradeRunId(null);
663+
setOptimisticRunId(null);
664+
workspaceRouting.setGradeRunId(null);
665+
workspaceRouting.setTestRunId(nextRunId);
666+
setSelectedTestRunId(nextRunId);
667+
updateCalibratePath(selectedSessionId, { gradeRunId: null });
668+
}, [
669+
selectedSessionId,
670+
selectedTestRunId,
671+
updateCalibratePath,
672+
workspaceRouting,
673+
]);
674+
443675
useEffect(() => {
444676
if (!setNavActions) return;
445677
setNavActions(null);
@@ -457,6 +689,24 @@ function GradePage(
457689
gap: 10,
458690
}}
459691
>
692+
{testRunOptions.length > 0 && (
693+
<Listbox
694+
label="Previous test run"
695+
value={selectedTestRunId ?? ""}
696+
onChange={handleTestRunSelection}
697+
options={testRunOptions.map((entry) => ({
698+
value: entry.scenarioRunId,
699+
label: getScenarioTitle(entry),
700+
meta: [
701+
entry.updatedAt
702+
? formatTimestampShort(entry.updatedAt)
703+
: null,
704+
entry.scenarioRunId,
705+
].filter(Boolean).join(" · "),
706+
}))}
707+
placeholder="Select previous run"
708+
/>
709+
)}
460710
<div className="flex-row gap-8 items-center">
461711
<div className="flex-1">
462712
<strong>Run a grader</strong>
@@ -525,7 +775,9 @@ function GradePage(
525775
</div>
526776
{runItems.length === 0 && (
527777
<div className="placeholder">
528-
No grader runs for this session yet.
778+
{selectedTestRunId
779+
? "No grader runs for this selected test run yet."
780+
: "No grader runs for this session yet."}
529781
</div>
530782
)}
531783
{routeRunNotFound && selectedSessionId && (

0 commit comments

Comments
 (0)