Skip to content

Commit 82db2d9

Browse files
committed
sync(bfmono): feat(simulator-ui): overhaul sessions drawer and reset flow (+19 more) (bfmono@149308372)
This PR is an automated gambitmono sync of bfmono Gambit packages. - Source: `packages/gambit/` - Core: `packages/gambit-core/` - bfmono rev: 149308372 Changes: - 149308372 feat(simulator-ui): overhaul sessions drawer and reset flow - 66e268db2 fix(gambit): enforce test bot hangup after first reply in simpsons explainers - fb56a27cc feat(gambit): unify calibrate drawer and toggle ratings - fe1cf95d6 feat(gambit): add test bot chat flow + simpsons explainer scaffold - 9b9eb64bf feat(gambit): test-bot init fill - 91c8ab646 fix(gambit): disable test-bot init inheritance - 3cb78f0e7 feat(gambit): restore model availability checks - 2ac9fe805 test(gambit): add opt-in live provider integration tests - 19f443033 feat(gambit): add deck startMode metadata - d8f7b0176 fix(gambit): improve simulator auto-bundling - 45bf70959 fix(gambit): honor openrouter fallback for google-prefixed models - a5be06d62 refactor(gambit): make gambit check provider-agnostic - b724212db feat(gambit): add google provider integration - db4550db6 feat(gambit): add provider router fallback config - 110433689 chore(gambit): cut 0.8.3 release - 3cc12a163 fix(gambit): update compile docs include - d17a4bdc3 chore(gambit): cut 0.8.2 release - 876085953 feat(gambit): improve init model fallback messaging - 5bda55166 feat(gambit): add model fallback resolution - bdc68aae6 feat(gambit): add gambit.toml model aliases Do not edit this repo directly; make changes in bfmono and re-run the sync.
1 parent cca2261 commit 82db2d9

12 files changed

Lines changed: 1143 additions & 447 deletions

File tree

scaffolds/demo/examples/advanced/simpsons_explainer/test_bots/quantum_entanglement.deck.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ model = "openai/gpt-4o-mini"
77
temperature = 0
88
+++
99

10-
![test_bot_hangup](../cards/test_bot_hangup.card.md)
11-
1210
You are a test user for the Simpsons-metaphor assistant. Ignore any prior
1311
assistant content.
1412

1513
If `initialQuestion` is provided, your first message must be exactly that text.
1614
Otherwise your first message must be exactly: "What is quantum entanglement?"
1715

18-
Do not ask any follow-up questions. After the assistant responds once, respond
19-
with an empty message to hang up and end the test run.
16+
You must send exactly two user messages total in this test run:
17+
18+
1. The first message per the rule above.
19+
2. After the assistant responds once, your second message must be an empty
20+
message to hang up and end the test run. Do not ask any follow-up questions
21+
or send any other text.

scaffolds/demo/examples/advanced/simpsons_explainer_user/test_bots/quantum_entanglement.deck.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ model = "openai/gpt-4o-mini"
77
temperature = 0
88
+++
99

10-
![test_bot_hangup](../cards/test_bot_hangup.card.md)
11-
1210
You are a test user for the Simpsons-metaphor assistant. Ignore any prior
1311
assistant content.
1412

1513
If `initialQuestion` is provided, your first message must be exactly that text.
1614
Otherwise your first message must be exactly: "What is quantum entanglement?"
1715

18-
Do not ask any follow-up questions. After the assistant responds once, respond
19-
with an empty message to hang up and end the test run.
16+
You must send exactly two user messages total in this test run:
17+
18+
1. The first message per the rule above.
19+
2. After the assistant responds once, your second message must be an empty
20+
message to hang up and end the test run. Do not ask any follow-up questions
21+
or send any other text.
Lines changed: 339 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,339 @@
1+
import React, { useMemo, useState } from "react";
2+
import Button from "./gds/Button.tsx";
3+
import Icon from "./gds/Icon.tsx";
4+
import Panel from "./gds/Panel.tsx";
5+
import {
6+
extractGradingFlags,
7+
extractTurnContext,
8+
formatSnippet,
9+
formatTimestampShort,
10+
getScoreClass,
11+
} from "./utils.ts";
12+
import type {
13+
FeedbackEntry,
14+
GradingFlag,
15+
SessionDetailResponse,
16+
} from "./utils.ts";
17+
18+
type CalibrateDrawerFeedbackItem = {
19+
entry: FeedbackEntry;
20+
message?: { content?: string } | null;
21+
role?: string | null;
22+
};
23+
24+
type CalibrateDrawerRunItem = {
25+
turnNumber?: number;
26+
input?: unknown;
27+
};
28+
29+
type CalibrateDrawerProps = {
30+
statePath?: string | null;
31+
loading?: boolean;
32+
error?: string | null;
33+
sessionId?: string | null;
34+
messages?: Array<{
35+
content?: string;
36+
role?: string;
37+
messageRefId?: string;
38+
feedback?: FeedbackEntry | null;
39+
}>;
40+
sessionDetail?: SessionDetailResponse | null;
41+
feedbackItems?: CalibrateDrawerFeedbackItem[];
42+
gradingFlags?: GradingFlag[];
43+
runLabelById?: Map<string, string>;
44+
runItemByRefId?: Map<string, CalibrateDrawerRunItem>;
45+
};
46+
47+
export default function CalibrateDrawer(props: CalibrateDrawerProps) {
48+
const {
49+
statePath,
50+
loading = false,
51+
error = null,
52+
sessionId,
53+
messages,
54+
sessionDetail,
55+
feedbackItems,
56+
gradingFlags,
57+
runLabelById = new Map(),
58+
runItemByRefId = new Map(),
59+
} = props;
60+
const [copiedStatePath, setCopiedStatePath] = useState(false);
61+
const resolvedStatePath = useMemo(() => {
62+
if (statePath) return statePath;
63+
const meta = sessionDetail?.meta;
64+
if (meta && typeof meta === "object") {
65+
const sessionStatePath = (meta as { sessionStatePath?: unknown })
66+
.sessionStatePath;
67+
if (typeof sessionStatePath === "string") {
68+
return sessionStatePath;
69+
}
70+
}
71+
return null;
72+
}, [sessionDetail?.meta, statePath]);
73+
const resolvedFeedbackItems = useMemo(() => {
74+
if (sessionDetail) {
75+
const feedback = sessionDetail.feedback ?? [];
76+
const refs = sessionDetail.messageRefs ?? [];
77+
const messages = sessionDetail.messages ?? [];
78+
const messageByRefId = new Map<
79+
string,
80+
{ content?: string; role?: string }
81+
>();
82+
const roleByRefId = new Map<string, string>();
83+
refs.forEach((ref, index) => {
84+
if (!ref?.id) return;
85+
const message = messages[index];
86+
if (message) {
87+
messageByRefId.set(
88+
ref.id,
89+
message as { content?: string; role?: string },
90+
);
91+
}
92+
if (ref.role) roleByRefId.set(ref.id, ref.role);
93+
});
94+
const items = feedback.map((entry) => {
95+
const message = messageByRefId.get(entry.messageRefId);
96+
const role = message?.role ?? roleByRefId.get(entry.messageRefId);
97+
return { entry, message, role };
98+
});
99+
return items.sort((a, b) => {
100+
const aKey = a.entry.createdAt ?? "";
101+
const bKey = b.entry.createdAt ?? "";
102+
return bKey.localeCompare(aKey);
103+
});
104+
}
105+
if (feedbackItems !== undefined) return feedbackItems;
106+
if (!messages?.length) return [];
107+
const items: Array<
108+
CalibrateDrawerFeedbackItem & { _sortIndex?: number }
109+
> = [];
110+
messages.forEach((message, index) => {
111+
if (!message?.feedback) return;
112+
items.push({
113+
entry: message.feedback,
114+
message: { content: message.content },
115+
role: message.role,
116+
_sortIndex: index,
117+
});
118+
});
119+
return items.sort((a, b) => {
120+
const aKey = a.entry.createdAt ?? "";
121+
const bKey = b.entry.createdAt ?? "";
122+
if (aKey && bKey && aKey !== bKey) {
123+
return bKey.localeCompare(aKey);
124+
}
125+
return (b._sortIndex ?? 0) - (a._sortIndex ?? 0);
126+
});
127+
}, [feedbackItems, messages, sessionDetail]);
128+
const resolvedGradingFlags = useMemo(() => {
129+
if (sessionDetail) {
130+
return extractGradingFlags(sessionDetail.meta);
131+
}
132+
return gradingFlags ?? [];
133+
}, [gradingFlags, sessionDetail]);
134+
const resolvedRunLabelById = useMemo(() => {
135+
if (runLabelById.size > 0) return runLabelById;
136+
const meta = sessionDetail?.meta;
137+
const runs = meta && typeof meta === "object"
138+
? (meta as { gradingRuns?: unknown }).gradingRuns
139+
: undefined;
140+
if (!Array.isArray(runs)) return runLabelById;
141+
const map = new Map<string, string>();
142+
runs.forEach((run) => {
143+
if (!run || typeof run !== "object") return;
144+
const record = run as {
145+
id?: unknown;
146+
graderLabel?: unknown;
147+
graderId?: unknown;
148+
};
149+
if (typeof record.id !== "string") return;
150+
const label = typeof record.graderLabel === "string"
151+
? record.graderLabel
152+
: typeof record.graderId === "string"
153+
? record.graderId
154+
: record.id;
155+
map.set(record.id, label);
156+
});
157+
return map;
158+
}, [runLabelById, sessionDetail]);
159+
const resolvedRunItemByRefId = useMemo(() => {
160+
if (runItemByRefId.size > 0) return runItemByRefId;
161+
const meta = sessionDetail?.meta;
162+
const runs = meta && typeof meta === "object"
163+
? (meta as { gradingRuns?: unknown }).gradingRuns
164+
: undefined;
165+
if (!Array.isArray(runs)) return runItemByRefId;
166+
const map = new Map<string, CalibrateDrawerRunItem>();
167+
runs.forEach((run) => {
168+
if (!run || typeof run !== "object") return;
169+
const record = run as {
170+
id?: unknown;
171+
result?: unknown;
172+
input?: unknown;
173+
};
174+
if (typeof record.id !== "string") return;
175+
const result = record.result;
176+
if (
177+
result &&
178+
typeof result === "object" &&
179+
(result as { mode?: unknown }).mode === "turns" &&
180+
Array.isArray((result as { turns?: unknown }).turns)
181+
) {
182+
const turns = (result as { turns?: unknown })
183+
.turns as Array<{ index?: number; input?: unknown }>;
184+
turns.forEach((turn, idx) => {
185+
const index = typeof turn.index === "number" ? turn.index : idx;
186+
map.set(`gradingRun:${record.id}#turn:${index}`, {
187+
turnNumber: index + 1,
188+
input: turn.input,
189+
});
190+
});
191+
} else {
192+
map.set(`gradingRun:${record.id}`, {
193+
input: record.input,
194+
});
195+
}
196+
});
197+
return map;
198+
}, [runItemByRefId, sessionDetail]);
199+
const showCopyStatePath = Boolean(resolvedStatePath);
200+
const handleCopyStatePath = useMemo(() => {
201+
if (!resolvedStatePath) return null;
202+
return () => {
203+
navigator.clipboard?.writeText(resolvedStatePath);
204+
setCopiedStatePath(true);
205+
window.setTimeout(() => setCopiedStatePath(false), 1200);
206+
};
207+
}, [resolvedStatePath]);
208+
return (
209+
<Panel as="aside" className="calibrate-drawer">
210+
<div className="drawer-section">
211+
<strong>Calibrate</strong>
212+
{showCopyStatePath && handleCopyStatePath && (
213+
<>
214+
<Button variant="secondary" onClick={handleCopyStatePath}>
215+
<Icon
216+
name={copiedStatePath ? "copied" : "copy"}
217+
size={14}
218+
/>
219+
{copiedStatePath ? "Copied" : "Copy state path"}
220+
</Button>
221+
<p className="calibrate-button-meta">
222+
Paste this in your coding assistant to debug the agent.
223+
</p>
224+
</>
225+
)}
226+
<h3>Ratings & flags</h3>
227+
{loading && (
228+
<div className="placeholder">Loading ratings and flags…</div>
229+
)}
230+
{error && <div className="error">{error}</div>}
231+
{!loading &&
232+
!error &&
233+
resolvedFeedbackItems.length === 0 &&
234+
resolvedGradingFlags.length === 0 && (
235+
<div className="placeholder">
236+
No ratings or flags yet.
237+
</div>
238+
)}
239+
{resolvedFeedbackItems.length > 0 && (
240+
<div className="calibrate-summary-list">
241+
{resolvedFeedbackItems.map(({ entry, message, role }) => {
242+
const roleLabel = role === "assistant"
243+
? "Assistant message"
244+
: "Test bot message";
245+
const displayScore = entry.score;
246+
const scoreLabel = displayScore > 0
247+
? `+${displayScore}`
248+
: displayScore;
249+
const scoreClass = getScoreClass(displayScore);
250+
return (
251+
<div
252+
key={`${entry.id}-${entry.messageRefId}`}
253+
className="calibrate-summary-card"
254+
>
255+
<div
256+
className="calibrate-summary-title"
257+
title={entry.createdAt &&
258+
formatTimestampShort(entry.createdAt)}
259+
>
260+
{roleLabel}
261+
</div>
262+
<div className="calibrate-summary-score-row">
263+
<div
264+
className={`calibrate-score-badge calibrate-score-badge--small ${scoreClass}`}
265+
>
266+
{scoreLabel}
267+
</div>
268+
{entry.reason && (
269+
<div className="calibrate-summary-reason ellipsis">
270+
{entry.reason}
271+
</div>
272+
)}
273+
</div>
274+
{message?.content && (
275+
<div className="calibrate-summary-meta ellipsis">
276+
{formatSnippet(message.content)}
277+
</div>
278+
)}
279+
</div>
280+
);
281+
})}
282+
</div>
283+
)}
284+
{resolvedGradingFlags.length > 0 && (
285+
<div className="calibrate-summary-list">
286+
{resolvedGradingFlags.map((flag) => {
287+
const runLabel = flag.runId
288+
? resolvedRunLabelById.get(flag.runId)
289+
: undefined;
290+
const flaggedItem = resolvedRunItemByRefId.get(flag.refId);
291+
const turnLabel = flaggedItem?.turnNumber
292+
? `Assistant turn ${flaggedItem.turnNumber}`
293+
: undefined;
294+
const gradedAssistant = extractTurnContext(
295+
flaggedItem?.input,
296+
).gradedAssistant;
297+
return (
298+
<div
299+
key={flag.id}
300+
className="calibrate-summary-card calibrate-flag-card"
301+
>
302+
<div className="calibrate-summary-title">Grader flag</div>
303+
{(runLabel || turnLabel)
304+
? (
305+
<div
306+
className="calibrate-summary-subtitle"
307+
title={flag.createdAt &&
308+
formatTimestampShort(flag.createdAt)}
309+
>
310+
{runLabel}
311+
{runLabel && turnLabel && " • "}
312+
{turnLabel}
313+
</div>
314+
)
315+
: "Flagged grader"}
316+
<div className="calibrate-summary-score-row">
317+
<div className="calibrate-score-badge calibrate-score-badge--small">
318+
<Icon name="flag" size={10} />
319+
</div>
320+
{flag.reason && (
321+
<div className="calibrate-summary-reason ellipsis">
322+
{flag.reason}
323+
</div>
324+
)}
325+
</div>
326+
{gradedAssistant && (
327+
<div className="calibrate-summary-meta ellipsis">
328+
{formatSnippet(gradedAssistant)}
329+
</div>
330+
)}
331+
</div>
332+
);
333+
})}
334+
</div>
335+
)}
336+
</div>
337+
</Panel>
338+
);
339+
}

0 commit comments

Comments
 (0)