Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 38 additions & 9 deletions apps/studio/src/components/RunEvalModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import { useQueryClient } from '@tanstack/react-query';
import { useCallback, useEffect, useMemo, useState } from 'react';

import { useNavigate } from '@tanstack/react-router';
import {
launchEvalRun,
previewEvalCommand,
Expand All @@ -40,6 +41,7 @@ export interface RunEvalModalProps {

export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalModalProps) {
const queryClient = useQueryClient();
const navigate = useNavigate();

// Form state
const [suiteFilter, setSuiteFilter] = useState(prefill?.suiteFilter ?? '');
Expand Down Expand Up @@ -148,9 +150,18 @@ export function RunEvalModal({ open, onClose, benchmarkId, prefill }: RunEvalMod
// ── Active run view ────────────────────────────────────────────────────

if (activeRunId && runStatus) {
function handleRunInBackground() {
onClose();
navigate({ to: '/', search: { tab: 'runs' } as Record<string, string> });
}
return (
<ModalShell onClose={onClose} title="Eval Run">
<RunStatusView status={runStatus} onClose={onClose} />
<RunStatusView
status={runStatus}
onClose={onClose}
onRunInBackground={handleRunInBackground}
runId={activeRunId}
/>
</ModalShell>
);
}
Expand Down Expand Up @@ -392,9 +403,13 @@ function ModalShell({
function RunStatusView({
status,
onClose,
onRunInBackground,
runId,
}: {
status: import('~/lib/types').EvalRunStatus;
onClose: () => void;
onRunInBackground?: () => void;
runId?: string;
}) {
const isTerminal = status.status === 'finished' || status.status === 'failed';

Expand Down Expand Up @@ -437,21 +452,35 @@ function RunStatusView({
</div>
)}

{isTerminal && (
<div className="flex items-center justify-between">
<span className="text-xs text-gray-500">
Exit code: {status.exit_code}
{status.finished_at && ` · ${new Date(status.finished_at).toLocaleTimeString()}`}
</span>
<div className="flex items-center justify-between">
<span className="text-xs text-gray-500">
{isTerminal ? (
<>
Exit code: {status.exit_code}
{status.finished_at && ` · ${new Date(status.finished_at).toLocaleTimeString()}`}
</>
) : (
runId && (
<button
type="button"
onClick={onRunInBackground}
className="text-xs text-gray-400 hover:text-cyan-400"
>
Run in background
</button>
)
)}
</span>
{isTerminal && (
<button
type="button"
onClick={onClose}
className="rounded-md bg-gray-700 px-4 py-2 text-sm text-white hover:bg-gray-600"
>
Close
</button>
</div>
)}
)}
</div>
</div>
);
}
12 changes: 12 additions & 0 deletions apps/studio/src/components/Sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
useBenchmarkRunDetail,
useBenchmarkRunList,
useCategorySuites,
useEvalRuns,
useExperiments,
useRunDetail,
useRunList,
Expand Down Expand Up @@ -166,12 +167,23 @@ function RunSidebar() {
const { data: aggregatedData } = useAllBenchmarkRuns();
const data = useAggregated ? aggregatedData : localData;

const { data: evalRunsData } = useEvalRuns();
const activeRunCount = (evalRunsData?.runs ?? []).filter(
(r) => r.status === 'starting' || r.status === 'running',
).length;

return (
<SidebarShell>
<div className="flex items-center gap-2 border-b border-gray-800 px-4 py-4">
<Link to="/" className="text-lg font-semibold text-white hover:text-cyan-400">
AgentV Studio
</Link>
{activeRunCount > 0 && (
<span className="flex items-center gap-1 rounded-full bg-cyan-900/40 px-2 py-0.5 text-xs text-cyan-400">
<span className="inline-block h-1.5 w-1.5 animate-pulse rounded-full bg-cyan-400" />
{activeRunCount}
</span>
)}
</div>

<nav className="flex-1 overflow-y-auto px-2 py-3">
Expand Down
14 changes: 14 additions & 0 deletions apps/studio/src/lib/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import type {
EvalDetailResponse,
EvalDiscoverResponse,
EvalPreviewResponse,
EvalRunListResponse,
EvalRunResponse,
EvalRunStatus,
EvalTargetsResponse,
Expand Down Expand Up @@ -512,6 +513,19 @@ export function useEvalRunStatus(runId: string | null) {
return useQuery(evalRunStatusOptions(runId));
}

export function evalRunsOptions(benchmarkId?: string) {
const url = benchmarkId ? `${benchmarkApiBase(benchmarkId)}/eval/runs` : '/api/eval/runs';
return queryOptions({
queryKey: ['eval-runs', benchmarkId ?? ''],
queryFn: () => fetchJson<EvalRunListResponse>(url),
refetchInterval: 3_000,
});
}

export function useEvalRuns(benchmarkId?: string) {
return useQuery(evalRunsOptions(benchmarkId));
}

export async function previewEvalCommand(
body: RunEvalRequest,
benchmarkId?: string,
Expand Down
21 changes: 21 additions & 0 deletions apps/studio/src/routeTree.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { Route as SettingsRouteImport } from './routes/settings'
import { Route as IndexRouteImport } from './routes/index'
import { Route as RunsRunIdRouteImport } from './routes/runs/$runId'
import { Route as ProjectsBenchmarkIdRouteImport } from './routes/projects/$benchmarkId'
import { Route as JobsRunIdRouteImport } from './routes/jobs/$runId'
import { Route as ExperimentsExperimentNameRouteImport } from './routes/experiments/$experimentName'
import { Route as EvalsRunIdEvalIdRouteImport } from './routes/evals/$runId.$evalId'
import { Route as RunsRunIdSuiteSuiteRouteImport } from './routes/runs/$runId_.suite.$suite'
Expand Down Expand Up @@ -40,6 +41,11 @@ const ProjectsBenchmarkIdRoute = ProjectsBenchmarkIdRouteImport.update({
path: '/projects/$benchmarkId',
getParentRoute: () => rootRouteImport,
} as any)
const JobsRunIdRoute = JobsRunIdRouteImport.update({
id: '/jobs/$runId',
path: '/jobs/$runId',
getParentRoute: () => rootRouteImport,
} as any)
const ExperimentsExperimentNameRoute =
ExperimentsExperimentNameRouteImport.update({
id: '/experiments/$experimentName',
Expand Down Expand Up @@ -79,6 +85,7 @@ export interface FileRoutesByFullPath {
'/': typeof IndexRoute
'/settings': typeof SettingsRoute
'/experiments/$experimentName': typeof ExperimentsExperimentNameRoute
'/jobs/$runId': typeof JobsRunIdRoute
'/projects/$benchmarkId': typeof ProjectsBenchmarkIdRoute
'/runs/$runId': typeof RunsRunIdRoute
'/evals/$runId/$evalId': typeof EvalsRunIdEvalIdRoute
Expand All @@ -91,6 +98,7 @@ export interface FileRoutesByTo {
'/': typeof IndexRoute
'/settings': typeof SettingsRoute
'/experiments/$experimentName': typeof ExperimentsExperimentNameRoute
'/jobs/$runId': typeof JobsRunIdRoute
'/projects/$benchmarkId': typeof ProjectsBenchmarkIdRoute
'/runs/$runId': typeof RunsRunIdRoute
'/evals/$runId/$evalId': typeof EvalsRunIdEvalIdRoute
Expand All @@ -104,6 +112,7 @@ export interface FileRoutesById {
'/': typeof IndexRoute
'/settings': typeof SettingsRoute
'/experiments/$experimentName': typeof ExperimentsExperimentNameRoute
'/jobs/$runId': typeof JobsRunIdRoute
'/projects/$benchmarkId': typeof ProjectsBenchmarkIdRoute
'/runs/$runId': typeof RunsRunIdRoute
'/evals/$runId/$evalId': typeof EvalsRunIdEvalIdRoute
Expand All @@ -118,6 +127,7 @@ export interface FileRouteTypes {
| '/'
| '/settings'
| '/experiments/$experimentName'
| '/jobs/$runId'
| '/projects/$benchmarkId'
| '/runs/$runId'
| '/evals/$runId/$evalId'
Expand All @@ -130,6 +140,7 @@ export interface FileRouteTypes {
| '/'
| '/settings'
| '/experiments/$experimentName'
| '/jobs/$runId'
| '/projects/$benchmarkId'
| '/runs/$runId'
| '/evals/$runId/$evalId'
Expand All @@ -142,6 +153,7 @@ export interface FileRouteTypes {
| '/'
| '/settings'
| '/experiments/$experimentName'
| '/jobs/$runId'
| '/projects/$benchmarkId'
| '/runs/$runId'
| '/evals/$runId/$evalId'
Expand All @@ -155,6 +167,7 @@ export interface RootRouteChildren {
IndexRoute: typeof IndexRoute
SettingsRoute: typeof SettingsRoute
ExperimentsExperimentNameRoute: typeof ExperimentsExperimentNameRoute
JobsRunIdRoute: typeof JobsRunIdRoute
ProjectsBenchmarkIdRoute: typeof ProjectsBenchmarkIdRoute
RunsRunIdRoute: typeof RunsRunIdRoute
EvalsRunIdEvalIdRoute: typeof EvalsRunIdEvalIdRoute
Expand Down Expand Up @@ -194,6 +207,13 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof ProjectsBenchmarkIdRouteImport
parentRoute: typeof rootRouteImport
}
'/jobs/$runId': {
id: '/jobs/$runId'
path: '/jobs/$runId'
fullPath: '/jobs/$runId'
preLoaderRoute: typeof JobsRunIdRouteImport
parentRoute: typeof rootRouteImport
}
'/experiments/$experimentName': {
id: '/experiments/$experimentName'
path: '/experiments/$experimentName'
Expand Down Expand Up @@ -243,6 +263,7 @@ const rootRouteChildren: RootRouteChildren = {
IndexRoute: IndexRoute,
SettingsRoute: SettingsRoute,
ExperimentsExperimentNameRoute: ExperimentsExperimentNameRoute,
JobsRunIdRoute: JobsRunIdRoute,
ProjectsBenchmarkIdRoute: ProjectsBenchmarkIdRoute,
RunsRunIdRoute: RunsRunIdRoute,
EvalsRunIdEvalIdRoute: EvalsRunIdEvalIdRoute,
Expand Down
43 changes: 42 additions & 1 deletion apps/studio/src/routes/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Uses URL search param `?tab=` for tab persistence.
*/

import { createFileRoute, useNavigate, useRouterState } from '@tanstack/react-router';
import { Link, createFileRoute, useNavigate, useRouterState } from '@tanstack/react-router';
import { useState } from 'react';

import { useQueryClient } from '@tanstack/react-query';
Expand All @@ -23,6 +23,7 @@ import {
syncRemoteResultsApi,
useBenchmarkList,
useCompare,
useEvalRuns,
useRemoteStatus,
useRunList,
useStudioConfig,
Expand Down Expand Up @@ -319,6 +320,7 @@ function RunsTabContent({

return (
<div className="space-y-4">
<ActiveRunsSection />
<RunSourceToolbar
filter={sourceFilter}
onFilterChange={onSourceFilterChange}
Expand Down Expand Up @@ -364,6 +366,45 @@ function RunsTabContent({
);
}

// ── Active runs section ───────────────────────────────────────────────────

function ActiveRunsSection() {
const { data } = useEvalRuns();
const activeRuns = (data?.runs ?? []).filter(
(r) => r.status === 'starting' || r.status === 'running',
);

if (activeRuns.length === 0) return null;

return (
<div className="rounded-lg border border-cyan-900/40 bg-cyan-950/10">
<div className="border-b border-cyan-900/30 px-4 py-2.5">
<span className="text-xs font-medium uppercase tracking-wider text-cyan-400">Active</span>
</div>
<ul className="divide-y divide-gray-800/50">
{activeRuns.map((run) => (
<li key={run.id} className="flex items-center justify-between px-4 py-3">
<div className="flex items-center gap-3 min-w-0">
<span className="inline-block h-2 w-2 flex-shrink-0 animate-pulse rounded-full bg-cyan-400" />
<span className="truncate font-mono text-sm text-gray-300">{run.id}</span>
<span className="flex-shrink-0 text-xs text-gray-500">
{new Date(run.started_at).toLocaleTimeString()}
</span>
</div>
<Link
to="/jobs/$runId"
params={{ runId: run.id }}
className="ml-4 flex-shrink-0 text-xs text-cyan-400 hover:text-cyan-300"
>
View Log →
</Link>
</li>
))}
</ul>
</div>
);
}

function LoadingSkeleton() {
return (
<div className="space-y-2">
Expand Down
Loading
Loading