diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap index a97bbeb1d..f1c6b5cd2 100644 --- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap +++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap @@ -357,7 +357,7 @@ exports[`Assets Directory Snapshots > CDK assets > cdk/cdk/package.json should m "typescript": "~5.9.3" }, "dependencies": { - "@aws/agentcore-cdk": "^0.1.0-alpha.19", + "@aws/agentcore-cdk": "^0.1.0-alpha.28", "aws-cdk-lib": "^2.248.0", "constructs": "^10.0.0" } diff --git a/src/assets/cdk/package.json b/src/assets/cdk/package.json index aa58892c2..073aaa294 100644 --- a/src/assets/cdk/package.json +++ b/src/assets/cdk/package.json @@ -23,7 +23,7 @@ "typescript": "~5.9.3" }, "dependencies": { - "@aws/agentcore-cdk": "^0.1.0-alpha.19", + "@aws/agentcore-cdk": "^0.1.0-alpha.28", "aws-cdk-lib": "^2.248.0", "constructs": "^10.0.0" } diff --git a/src/cli/primitives/OnlineEvalConfigPrimitive.ts b/src/cli/primitives/OnlineEvalConfigPrimitive.ts index 6e4436ac9..477de5b45 100644 --- a/src/cli/primitives/OnlineEvalConfigPrimitive.ts +++ b/src/cli/primitives/OnlineEvalConfigPrimitive.ts @@ -1,5 +1,5 @@ import { findConfigRoot } from '../../lib'; -import type { OnlineEvalConfig } from '../../schema'; +import type { OnlineEvalConfig, OnlineEvalFilter } from '../../schema'; import { OnlineEvalConfigSchema } from '../../schema'; import { getErrorMessage } from '../errors'; import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; @@ -16,6 +16,8 @@ export interface AddOnlineEvalConfigOptions { samplingRate: number; enableOnCreate?: boolean; endpoint?: string; + sessionTimeoutMinutes?: number; + filters?: OnlineEvalFilter[]; } export type RemovableOnlineEvalConfig = RemovableResource; @@ -235,6 +237,8 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive 0 && { filters: options.filters }), }; project.onlineEvalConfigs.push(config); diff --git a/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts b/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts index c81160a6c..8fe939991 100644 --- a/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts +++ b/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts @@ -95,6 +95,47 @@ describe('OnlineEvalConfigPrimitive', () => { expect(config.enableOnCreate).toBeUndefined(); }); + it('stores sessionTimeoutMinutes and filters when provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.add({ + name: 'WithTimeoutAndFilters', + agent: 'MyAgent', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + sessionTimeoutMinutes: 30, + filters: [ + { key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }, + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + ], + }); + + expect(result.success).toBe(true); + const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0]; + expect(config.sessionTimeoutMinutes).toBe(30); + expect(config.filters).toEqual([ + { key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }, + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + ]); + }); + + it('omits sessionTimeoutMinutes and filters when not provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + await primitive.add({ + name: 'NoTimeoutOrFilters', + agent: 'MyAgent', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + }); + + const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0]; + expect(config.sessionTimeoutMinutes).toBeUndefined(); + expect(config.filters).toBeUndefined(); + }); + it('supports multiple evaluators including ARNs', async () => { mockReadProjectSpec.mockResolvedValue(makeProject()); mockWriteProjectSpec.mockResolvedValue(undefined); diff --git a/src/cli/tui/hooks/useCreateOnlineEval.ts b/src/cli/tui/hooks/useCreateOnlineEval.ts index b853fed05..b27fafab3 100644 --- a/src/cli/tui/hooks/useCreateOnlineEval.ts +++ b/src/cli/tui/hooks/useCreateOnlineEval.ts @@ -1,4 +1,5 @@ import { onlineEvalConfigPrimitive } from '../../primitives/registry'; +import type { OnlineEvalFilter } from '../../../schema'; import { withAddTelemetry } from '../../telemetry/cli-command-run.js'; import { useCallback, useEffect, useState } from 'react'; @@ -9,6 +10,7 @@ interface CreateOnlineEvalConfig { evaluators: string[]; samplingRate: number; sessionTimeoutMinutes?: number; + filters?: OnlineEvalFilter[]; enableOnCreate: boolean; } @@ -34,6 +36,7 @@ export function useCreateOnlineEval() { evaluators: config.evaluators, samplingRate: config.samplingRate, ...(config.sessionTimeoutMinutes !== undefined && { sessionTimeoutMinutes: config.sessionTimeoutMinutes }), + ...(config.filters && config.filters.length > 0 && { filters: config.filters }), enableOnCreate: config.enableOnCreate, }) ); diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index fd5fafcf6..768022e7e 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -12,8 +12,14 @@ import { import { HELP_TEXT } from '../../constants'; import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; +import { formatFilter, parseFiltersInput } from './filter-parser'; import type { AddOnlineEvalConfig, EvaluatorItem, RuntimeEndpointEntry } from './types'; -import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types'; +import { + DEFAULT_SAMPLING_RATE, + DEFAULT_SESSION_TIMEOUT_MINUTES, + ONLINE_EVAL_FILTER_OPERATORS, + ONLINE_EVAL_STEP_LABELS, +} from './types'; import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard'; import { Box, Text } from 'ink'; import React, { useCallback, useEffect, useMemo } from 'react'; @@ -99,6 +105,8 @@ export function AddOnlineEvalScreen({ const isEndpointStep = wizard.step === 'endpoint'; const isEvaluatorsStep = wizard.step === 'evaluators'; const isSamplingRateStep = wizard.step === 'samplingRate'; + const isSessionTimeoutStep = wizard.step === 'sessionTimeout'; + const isFiltersStep = wizard.step === 'filters'; const isEnableOnCreateStep = wizard.step === 'enableOnCreate'; const isConfirmStep = wizard.step === 'confirm'; @@ -230,6 +238,74 @@ export function AddOnlineEvalScreen({ )} + {isSessionTimeoutStep && ( + + + Minutes of inactivity before an agent session is considered complete (1–1440). Leave blank to use the + default of {DEFAULT_SESSION_TIMEOUT_MINUTES}. + + { + const trimmed = value.trim(); + if (trimmed === '') { + wizard.setSessionTimeoutMinutes(undefined); + return; + } + const minutes = parseInt(trimmed, 10); + if (isNaN(minutes) || minutes < 1 || minutes > 1440) return; + wizard.setSessionTimeoutMinutes(minutes); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const trimmed = value.trim(); + if (trimmed === '') return true; + const minutes = parseInt(trimmed, 10); + if (isNaN(minutes)) return 'Must be an integer or blank'; + if (minutes < 1 || minutes > 1440) return 'Must be between 1 and 1440'; + return true; + }} + /> + + )} + + {isFiltersStep && ( + + + Optional filters that scope which traces are evaluated. Format: {''} {''} {''}, separated + by ";". Operators: {ONLINE_EVAL_FILTER_OPERATORS.join(', ')}. Bare {'`true`'}/{'`false`'} → boolean; bare + numbers → double; quoted {'`"..."`'} or anything else → string. Use quotes to force string type for + numeric-looking IDs or the literal words {'`"true"`'}/{'`"false"`'}. Leave blank for no filters. + + { + const trimmed = value.trim(); + if (trimmed === '') { + wizard.setFilters(undefined); + return; + } + const parsed = parseFiltersInput(trimmed); + if (!parsed) return; + wizard.setFilters(parsed); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const trimmed = value.trim(); + if (trimmed === '') return true; + const parsed = parseFiltersInput(trimmed); + if (!parsed) + return 'Each filter must be " " with a valid operator (separate with ";")'; + return true; + }} + /> + + )} + {isEnableOnCreateStep && ( 0 + ? effectiveConfig.filters.map(formatFilter).join('; ') + : '(none)', + }, { label: 'Enable on Deploy', value: effectiveConfig.enableOnCreate ? 'Yes' : 'No' }, ]} /> diff --git a/src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts b/src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts new file mode 100644 index 000000000..43f71224d --- /dev/null +++ b/src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts @@ -0,0 +1,138 @@ +import { formatFilter, parseFiltersInput } from '../filter-parser'; +import { describe, expect, it } from 'vitest'; + +describe('parseFiltersInput', () => { + describe('value typing', () => { + it('treats bare true/false as booleans', () => { + const result = parseFiltersInput('success Equals true; failed Equals false'); + expect(result).toEqual([ + { key: 'success', operator: 'Equals', value: { booleanValue: true } }, + { key: 'failed', operator: 'Equals', value: { booleanValue: false } }, + ]); + }); + + it('treats bare integer values as doubles', () => { + const result = parseFiltersInput('latencyMs LessThan 1000'); + expect(result).toEqual([ + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + ]); + }); + + it('treats bare decimal values as doubles', () => { + const result = parseFiltersInput('score GreaterThan -0.5'); + expect(result).toEqual([ + { key: 'score', operator: 'GreaterThan', value: { doubleValue: -0.5 } }, + ]); + }); + + it('treats bare non-numeric, non-boolean values as strings', () => { + const result = parseFiltersInput('model Equals claude-3'); + expect(result).toEqual([ + { key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }, + ]); + }); + + it('treats double-quoted values as strings even when they look numeric', () => { + const result = parseFiltersInput('id Equals "12345"'); + expect(result).toEqual([{ key: 'id', operator: 'Equals', value: { stringValue: '12345' } }]); + }); + + it('treats double-quoted "true"/"false" as strings, not booleans', () => { + const result = parseFiltersInput('flag Equals "true"'); + expect(result).toEqual([{ key: 'flag', operator: 'Equals', value: { stringValue: 'true' } }]); + }); + + it('treats quoted empty string as an empty stringValue', () => { + const result = parseFiltersInput('note Equals ""'); + expect(result).toEqual([{ key: 'note', operator: 'Equals', value: { stringValue: '' } }]); + }); + + it('preserves multi-word bare values verbatim', () => { + const result = parseFiltersInput('label Contains hello world'); + expect(result).toEqual([ + { key: 'label', operator: 'Contains', value: { stringValue: 'hello world' } }, + ]); + }); + }); + + describe('operator handling', () => { + it('accepts every supported operator', () => { + const operators = [ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', + ]; + for (const op of operators) { + const result = parseFiltersInput(`k ${op} v`); + expect(result, `operator ${op}`).toEqual([ + { key: 'k', operator: op, value: { stringValue: 'v' } }, + ]); + } + }); + + it('returns undefined when the operator is unknown', () => { + expect(parseFiltersInput('k FooBar v')).toBeUndefined(); + }); + + it('is case-sensitive on operator names', () => { + expect(parseFiltersInput('k equals v')).toBeUndefined(); + }); + }); + + describe('segment splitting', () => { + it('splits on ";" and trims each segment', () => { + const result = parseFiltersInput(' a Equals 1 ; b NotEquals 2 '); + expect(result).toEqual([ + { key: 'a', operator: 'Equals', value: { doubleValue: 1 } }, + { key: 'b', operator: 'NotEquals', value: { doubleValue: 2 } }, + ]); + }); + + it('ignores empty segments produced by trailing or doubled ";"', () => { + const result = parseFiltersInput('a Equals 1;;b Equals 2;'); + expect(result).toHaveLength(2); + }); + + it('returns undefined when input has no non-empty segments', () => { + expect(parseFiltersInput('')).toBeUndefined(); + expect(parseFiltersInput(' ;; ')).toBeUndefined(); + }); + }); + + describe('failure modes', () => { + it('returns undefined when a segment has fewer than three parts', () => { + expect(parseFiltersInput('onlykey')).toBeUndefined(); + expect(parseFiltersInput('key Equals')).toBeUndefined(); + }); + + it('returns undefined when any segment in a list is malformed', () => { + // First segment is fine; second is missing the value. + expect(parseFiltersInput('a Equals 1; b Equals')).toBeUndefined(); + }); + }); +}); + +describe('formatFilter', () => { + it('renders string values', () => { + expect( + formatFilter({ key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }) + ).toBe('model Equals claude-3'); + }); + + it('renders double values', () => { + expect( + formatFilter({ key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }) + ).toBe('latencyMs LessThan 1000'); + }); + + it('renders boolean values', () => { + expect( + formatFilter({ key: 'success', operator: 'Equals', value: { booleanValue: true } }) + ).toBe('success Equals true'); + }); +}); diff --git a/src/cli/tui/screens/online-eval/filter-parser.ts b/src/cli/tui/screens/online-eval/filter-parser.ts new file mode 100644 index 000000000..9c7062e55 --- /dev/null +++ b/src/cli/tui/screens/online-eval/filter-parser.ts @@ -0,0 +1,81 @@ +// ────────────────────────────────────────────────────────────────────────────── +// Filter parsing helpers for the online-eval `agentcore add` wizard. +// +// Kept as a sibling module (rather than co-located with the React component) +// so it is independently unit-testable. +// ────────────────────────────────────────────────────────────────────────────── + +import type { OnlineEvalFilter, OnlineEvalFilterOperator } from '../../../../schema'; +import { ONLINE_EVAL_FILTER_OPERATORS } from './types'; + +/** + * Render a single filter as the canonical " " string + * shown in the confirm screen. Quoting is *not* re-applied to string values + * because this is for display only; round-trip through the parser is not the + * goal here. + */ +export function formatFilter(f: OnlineEvalFilter): string { + const v = + f.value.stringValue !== undefined + ? f.value.stringValue + : f.value.doubleValue !== undefined + ? String(f.value.doubleValue) + : f.value.booleanValue !== undefined + ? String(f.value.booleanValue) + : ''; + return `${f.key} ${f.operator} ${v}`; +} + +/** + * Parse a filter input string such as: + * 'model Equals claude-3; latencyMs LessThan 1000; success Equals true' + * 'id Equals "12345"; flag Equals "true"' + * + * Value typing rules: + * - Double-quoted value (e.g. "12345") → stringValue (quotes stripped) + * - Bare `true` / `false` → booleanValue + * - Bare numeric (`-?\d+(\.\d+)?`) → doubleValue + * - Anything else → stringValue + * + * Returns undefined if any segment is malformed or contains an unknown operator. + * An empty input (no non-empty segments after splitting on `;`) also returns + * undefined so callers can distinguish "user cleared the field" from "user + * supplied at least one filter". + */ +export function parseFiltersInput(input: string): OnlineEvalFilter[] | undefined { + const segments = input + .split(';') + .map(s => s.trim()) + .filter(s => s.length > 0); + if (segments.length === 0) return undefined; + + const filters: OnlineEvalFilter[] = []; + for (const segment of segments) { + // Match: where is either a double-quoted + // string (with no embedded quotes) or a bare token sequence to end of line. + const match = segment.match(/^(\S+)\s+(\S+)\s+(?:"([^"]*)"|(.+?))\s*$/); + if (!match) return undefined; + const key = match[1]!; + const operator = match[2] as OnlineEvalFilterOperator; + if (!ONLINE_EVAL_FILTER_OPERATORS.includes(operator)) return undefined; + const quoted = match[3]; + const bare = match[4]; + + let value: OnlineEvalFilter['value']; + if (quoted !== undefined) { + // Explicit string — preserves "true", "false", "12345" as strings. + value = { stringValue: quoted }; + } else { + const rawValue = bare!; + if (rawValue === 'true' || rawValue === 'false') { + value = { booleanValue: rawValue === 'true' }; + } else if (/^-?\d+(\.\d+)?$/.test(rawValue)) { + value = { doubleValue: parseFloat(rawValue) }; + } else { + value = { stringValue: rawValue }; + } + } + filters.push({ key, operator, value }); + } + return filters; +} diff --git a/src/cli/tui/screens/online-eval/types.ts b/src/cli/tui/screens/online-eval/types.ts index 1a1e5940c..7d271967c 100644 --- a/src/cli/tui/screens/online-eval/types.ts +++ b/src/cli/tui/screens/online-eval/types.ts @@ -1,6 +1,8 @@ -// ───────────────────────────────────────────────────────────────────────────── +// ────────────────────────────────────────────────────────────────────────────── // Online Eval Config Flow Types -// ───────────────────────────────────────────────────────────────────────────── +// ────────────────────────────────────────────────────────────────────────────── + +import type { OnlineEvalFilter, OnlineEvalFilterOperator } from '../../../../schema'; export type AddOnlineEvalStep = | 'name' @@ -8,6 +10,8 @@ export type AddOnlineEvalStep = | 'endpoint' | 'evaluators' | 'samplingRate' + | 'sessionTimeout' + | 'filters' | 'enableOnCreate' | 'confirm'; @@ -17,6 +21,8 @@ export interface AddOnlineEvalConfig { endpoint?: string; evaluators: string[]; samplingRate: number; + sessionTimeoutMinutes?: number; + filters?: OnlineEvalFilter[]; enableOnCreate: boolean; description?: string; } @@ -33,13 +39,27 @@ export const ONLINE_EVAL_STEP_LABELS: Record = { endpoint: 'Endpoint', evaluators: 'Evaluators', samplingRate: 'Rate', + sessionTimeout: 'Timeout', + filters: 'Filters', enableOnCreate: 'Enable', confirm: 'Confirm', }; -// ───────────────────────────────────────────────────────────────────────────── +/** Filter operators offered in the wizard. */ +export const ONLINE_EVAL_FILTER_OPERATORS: OnlineEvalFilterOperator[] = [ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', +]; + +// ────────────────────────────────────────────────────────────────────────────── // Evaluator Items (fetched from API) -// ───────────────────────────────────────────────────────────────────────────── +// ────────────────────────────────────────────────────────────────────────────── export interface EvaluatorItem { /** ARN used as the stored identifier in the config */ @@ -53,3 +73,4 @@ export interface EvaluatorItem { } export const DEFAULT_SAMPLING_RATE = 10; +export const DEFAULT_SESSION_TIMEOUT_MINUTES = 5; diff --git a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts index 239a95edc..ea562fda2 100644 --- a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts +++ b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts @@ -1,3 +1,4 @@ +import type { OnlineEvalFilter } from '../../../../schema'; import type { AddOnlineEvalConfig, AddOnlineEvalStep } from './types'; import { DEFAULT_SAMPLING_RATE } from './types'; import { useCallback, useRef, useState } from 'react'; @@ -5,9 +6,19 @@ import { useCallback, useRef, useState } from 'react'; function getAllSteps(agentCount: number): AddOnlineEvalStep[] { if (agentCount <= 1) { // endpoint step is included but will be skipped dynamically when no endpoints exist - return ['name', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return ['name', 'endpoint', 'evaluators', 'samplingRate', 'sessionTimeout', 'filters', 'enableOnCreate', 'confirm']; } - return ['name', 'agent', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return [ + 'name', + 'agent', + 'endpoint', + 'evaluators', + 'samplingRate', + 'sessionTimeout', + 'filters', + 'enableOnCreate', + 'confirm', + ]; } function getDefaultConfig(): AddOnlineEvalConfig { @@ -17,6 +28,8 @@ function getDefaultConfig(): AddOnlineEvalConfig { endpoint: undefined, evaluators: [], samplingRate: DEFAULT_SAMPLING_RATE, + sessionTimeoutMinutes: undefined, + filters: undefined, enableOnCreate: true, }; } @@ -102,6 +115,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { [nextStep, setConfig, setStep] ); + const setSessionTimeoutMinutes = useCallback( + (sessionTimeoutMinutes: number | undefined) => { + setConfig(c => ({ ...c, sessionTimeoutMinutes })); + const next = nextStep('sessionTimeout'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setFilters = useCallback( + (filters: OnlineEvalFilter[] | undefined) => { + setConfig(c => ({ ...c, filters: filters && filters.length > 0 ? filters : undefined })); + const next = nextStep('filters'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + const setEnableOnCreate = useCallback( (enableOnCreate: boolean) => { setConfig(c => ({ ...c, enableOnCreate })); @@ -128,6 +159,8 @@ export function useAddOnlineEvalWizard(agentCount: number) { setEndpoint, setEvaluators, setSamplingRate, + setSessionTimeoutMinutes, + setFilters, setEnableOnCreate, reset, }; diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts index 10d164a2c..ca367a32c 100644 --- a/src/schema/schemas/agentcore-project.ts +++ b/src/schema/schemas/agentcore-project.ts @@ -34,7 +34,7 @@ export { }; export { EvaluationLevelSchema }; export type { MemoryStrategy, MemoryStrategyType } from './primitives/memory'; -export type { OnlineEvalConfig } from './primitives/online-eval-config'; +export type { OnlineEvalConfig, OnlineEvalFilter, OnlineEvalFilterOperator, OnlineEvalFilterValue } from './primitives/online-eval-config'; export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './primitives/online-eval-config'; export type { CodeBasedConfig, diff --git a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts index e6e940948..e60b19955 100644 --- a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts +++ b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts @@ -98,4 +98,137 @@ describe('OnlineEvalConfigSchema', () => { it('accepts config without description and enableOnCreate', () => { expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); }); + + describe('sessionTimeoutMinutes', () => { + it('accepts boundary value 1', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 1 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts boundary value 1440', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 1440 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('rejects 0', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 0 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects 1441', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 1441 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects non-integer values', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 5.5 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('omitting the field is valid (uses construct default)', () => { + expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); + }); + }); + + describe('filters', () => { + const baseFilter = { + key: 'model', + operator: 'Equals' as const, + value: { stringValue: 'claude-3' }, + }; + + it('accepts a single valid filter with a stringValue', () => { + const config = { ...validConfig, filters: [baseFilter] }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts filters with doubleValue and booleanValue', () => { + const config = { + ...validConfig, + filters: [ + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + { key: 'success', operator: 'Equals', value: { booleanValue: true } }, + ], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts every supported operator', () => { + const operators = [ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', + ] as const; + for (const op of operators) { + const config = { ...validConfig, filters: [{ ...baseFilter, operator: op }] }; + expect(OnlineEvalConfigSchema.safeParse(config).success, `operator ${op}`).toBe(true); + } + }); + + it('rejects an invalid operator', () => { + const config = { + ...validConfig, + filters: [{ ...baseFilter, operator: 'StartsWith' }], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with zero value variants set', () => { + const config = { + ...validConfig, + filters: [{ key: 'model', operator: 'Equals', value: {} }], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with two value variants set simultaneously', () => { + const config = { + ...validConfig, + filters: [ + { + key: 'model', + operator: 'Equals', + value: { stringValue: 'claude-3', doubleValue: 1 }, + }, + ], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with all three value variants set', () => { + const config = { + ...validConfig, + filters: [ + { + key: 'model', + operator: 'Equals', + value: { stringValue: 'x', doubleValue: 1, booleanValue: true }, + }, + ], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with an empty key', () => { + const config = { + ...validConfig, + filters: [{ ...baseFilter, key: '' }], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('omitting the field is valid', () => { + expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); + }); + + it('accepts an empty filters array', () => { + const config = { ...validConfig, filters: [] }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + }); }); diff --git a/src/schema/schemas/primitives/index.ts b/src/schema/schemas/primitives/index.ts index a48985c84..f30ac5722 100644 --- a/src/schema/schemas/primitives/index.ts +++ b/src/schema/schemas/primitives/index.ts @@ -54,7 +54,7 @@ export { RatingScaleSchema, } from './evaluator'; -export type { OnlineEvalConfig } from './online-eval-config'; +export type { OnlineEvalConfig, OnlineEvalFilter, OnlineEvalFilterOperator, OnlineEvalFilterValue } from './online-eval-config'; export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './online-eval-config'; export type { Policy, PolicyEngine, ValidationMode } from './policy'; diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts index 5b6f13cb6..9bbd1ccb2 100644 --- a/src/schema/schemas/primitives/online-eval-config.ts +++ b/src/schema/schemas/primitives/online-eval-config.ts @@ -14,6 +14,36 @@ export const OnlineEvalConfigNameSchema = z 'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)' ); +/** Operators supported by online evaluation config filters. */ +export const OnlineEvalFilterOperatorSchema = z.enum([ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', +]); + +/** Value for a single filter — exactly one of stringValue/doubleValue/booleanValue should be set. */ +export const OnlineEvalFilterValueSchema = z + .object({ + stringValue: z.string().optional(), + doubleValue: z.number().optional(), + booleanValue: z.boolean().optional(), + }) + .refine( + v => [v.stringValue, v.doubleValue, v.booleanValue].filter(x => x !== undefined).length === 1, + 'Exactly one of stringValue, doubleValue, or booleanValue must be set' + ); + +export const OnlineEvalFilterSchema = z.object({ + key: z.string().min(1), + operator: OnlineEvalFilterOperatorSchema, + value: OnlineEvalFilterValueSchema, +}); + export const OnlineEvalConfigSchema = z.object({ name: OnlineEvalConfigNameSchema, /** Agent name to monitor (must match a project agent) */ @@ -26,9 +56,16 @@ export const OnlineEvalConfigSchema = z.object({ samplingRate: z.number().min(0.01).max(100), /** Optional description for the online eval config */ description: z.string().max(200).optional(), + /** Session idle timeout in minutes (1-1440). Default: 5 */ + sessionTimeoutMinutes: z.number().int().min(1).max(1440).optional(), + /** Optional filters that scope which agent traces are evaluated. */ + filters: z.array(OnlineEvalFilterSchema).optional(), /** Whether to enable execution on create (default: true) */ enableOnCreate: z.boolean().optional(), tags: TagsSchema.optional(), }); export type OnlineEvalConfig = z.infer; +export type OnlineEvalFilter = z.infer; +export type OnlineEvalFilterOperator = z.infer; +export type OnlineEvalFilterValue = z.infer;