From f261cd5e65ca199170574ec508bb996462ec4d35 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 7 May 2026 18:37:12 +0000 Subject: [PATCH 1/6] fix: add sessionTimeoutMinutes and filters to online eval config (#906) --- .../primitives/OnlineEvalConfigPrimitive.ts | 6 +- src/cli/tui/hooks/useCreateOnlineEval.ts | 3 + .../online-eval/AddOnlineEvalScreen.tsx | 141 +++++++++++++++++- src/cli/tui/screens/online-eval/types.ts | 29 +++- .../online-eval/useAddOnlineEvalWizard.ts | 37 ++++- src/schema/schemas/agentcore-project.ts | 2 +- src/schema/schemas/primitives/index.ts | 2 +- .../schemas/primitives/online-eval-config.ts | 37 +++++ 8 files changed, 247 insertions(+), 10 deletions(-) diff --git a/src/cli/primitives/OnlineEvalConfigPrimitive.ts b/src/cli/primitives/OnlineEvalConfigPrimitive.ts index 6e4436ac9..477de5b45 100644 --- a/src/cli/primitives/OnlineEvalConfigPrimitive.ts +++ b/src/cli/primitives/OnlineEvalConfigPrimitive.ts @@ -1,5 +1,5 @@ import { findConfigRoot } from '../../lib'; -import type { OnlineEvalConfig } from '../../schema'; +import type { OnlineEvalConfig, OnlineEvalFilter } from '../../schema'; import { OnlineEvalConfigSchema } from '../../schema'; import { getErrorMessage } from '../errors'; import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types'; @@ -16,6 +16,8 @@ export interface AddOnlineEvalConfigOptions { samplingRate: number; enableOnCreate?: boolean; endpoint?: string; + sessionTimeoutMinutes?: number; + filters?: OnlineEvalFilter[]; } export type RemovableOnlineEvalConfig = RemovableResource; @@ -235,6 +237,8 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive 0 && { filters: options.filters }), }; project.onlineEvalConfigs.push(config); diff --git a/src/cli/tui/hooks/useCreateOnlineEval.ts b/src/cli/tui/hooks/useCreateOnlineEval.ts index b853fed05..b27fafab3 100644 --- a/src/cli/tui/hooks/useCreateOnlineEval.ts +++ b/src/cli/tui/hooks/useCreateOnlineEval.ts @@ -1,4 +1,5 @@ import { onlineEvalConfigPrimitive } from '../../primitives/registry'; +import type { OnlineEvalFilter } from '../../../schema'; import { withAddTelemetry } from '../../telemetry/cli-command-run.js'; import { useCallback, useEffect, useState } from 'react'; @@ -9,6 +10,7 @@ interface CreateOnlineEvalConfig { evaluators: string[]; samplingRate: number; sessionTimeoutMinutes?: number; + filters?: OnlineEvalFilter[]; enableOnCreate: boolean; } @@ -34,6 +36,7 @@ export function useCreateOnlineEval() { evaluators: config.evaluators, samplingRate: config.samplingRate, ...(config.sessionTimeoutMinutes !== undefined && { sessionTimeoutMinutes: config.sessionTimeoutMinutes }), + ...(config.filters && config.filters.length > 0 && { filters: config.filters }), enableOnCreate: config.enableOnCreate, }) ); diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index fd5fafcf6..cdfd0de06 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -1,4 +1,5 @@ import { OnlineEvalConfigNameSchema } from '../../../../schema'; +import type { OnlineEvalFilter, OnlineEvalFilterOperator } from '../../../../schema'; import type { SelectableItem } from '../../components'; import { ConfirmReview, @@ -13,7 +14,12 @@ import { HELP_TEXT } from '../../constants'; import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; import type { AddOnlineEvalConfig, EvaluatorItem, RuntimeEndpointEntry } from './types'; -import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types'; +import { + DEFAULT_SAMPLING_RATE, + DEFAULT_SESSION_TIMEOUT_MINUTES, + ONLINE_EVAL_FILTER_OPERATORS, + ONLINE_EVAL_STEP_LABELS, +} from './types'; import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard'; import { Box, Text } from 'ink'; import React, { useCallback, useEffect, useMemo } from 'react'; @@ -99,6 +105,8 @@ export function AddOnlineEvalScreen({ const isEndpointStep = wizard.step === 'endpoint'; const isEvaluatorsStep = wizard.step === 'evaluators'; const isSamplingRateStep = wizard.step === 'samplingRate'; + const isSessionTimeoutStep = wizard.step === 'sessionTimeout'; + const isFiltersStep = wizard.step === 'filters'; const isEnableOnCreateStep = wizard.step === 'enableOnCreate'; const isConfirmStep = wizard.step === 'confirm'; @@ -230,6 +238,73 @@ export function AddOnlineEvalScreen({ )} + {isSessionTimeoutStep && ( + + + Minutes of inactivity before an agent session is considered complete (1–1440). Leave blank to use the + default of {DEFAULT_SESSION_TIMEOUT_MINUTES}. + + { + const trimmed = value.trim(); + if (trimmed === '') { + wizard.setSessionTimeoutMinutes(undefined); + return; + } + const minutes = parseInt(trimmed, 10); + if (isNaN(minutes) || minutes < 1 || minutes > 1440) return; + wizard.setSessionTimeoutMinutes(minutes); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const trimmed = value.trim(); + if (trimmed === '') return true; + const minutes = parseInt(trimmed, 10); + if (isNaN(minutes)) return 'Must be an integer or blank'; + if (minutes < 1 || minutes > 1440) return 'Must be between 1 and 1440'; + return true; + }} + /> + + )} + + {isFiltersStep && ( + + + Optional filters that scope which traces are evaluated. Format: {''} {''} {''}, separated + by ";". Operators: {ONLINE_EVAL_FILTER_OPERATORS.join(', ')}. Values are parsed as boolean (true/false), + number, or string. Leave blank for no filters. + + { + const trimmed = value.trim(); + if (trimmed === '') { + wizard.setFilters(undefined); + return; + } + const parsed = parseFiltersInput(trimmed); + if (!parsed) return; + wizard.setFilters(parsed); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const trimmed = value.trim(); + if (trimmed === '') return true; + const parsed = parseFiltersInput(trimmed); + if (!parsed) + return 'Each filter must be " " with a valid operator (separate with ";")'; + return true; + }} + /> + + )} + {isEnableOnCreateStep && ( 0 + ? effectiveConfig.filters.map(formatFilter).join('; ') + : '(none)', + }, { label: 'Enable on Deploy', value: effectiveConfig.enableOnCreate ? 'Yes' : 'No' }, ]} /> @@ -255,3 +344,53 @@ export function AddOnlineEvalScreen({ ); } + +// ────────────────────────────────────────────────────────────────────────────── +// Filter parsing helpers +// ────────────────────────────────────────────────────────────────────────────── + +function formatFilter(f: OnlineEvalFilter): string { + const v = + f.value.stringValue !== undefined + ? f.value.stringValue + : f.value.doubleValue !== undefined + ? String(f.value.doubleValue) + : f.value.booleanValue !== undefined + ? String(f.value.booleanValue) + : ''; + return `${f.key} ${f.operator} ${v}`; +} + +/** + * Parse a filter input string such as: + * "model Equals claude-3; latencyMs LessThan 1000; success Equals true" + * Returns undefined if any segment is malformed. + */ +function parseFiltersInput(input: string): OnlineEvalFilter[] | undefined { + const segments = input + .split(';') + .map(s => s.trim()) + .filter(s => s.length > 0); + if (segments.length === 0) return undefined; + + const filters: OnlineEvalFilter[] = []; + for (const segment of segments) { + const parts = segment.split(/\s+/); + if (parts.length < 3) return undefined; + const key = parts[0]!; + const operator = parts[1] as OnlineEvalFilterOperator; + if (!ONLINE_EVAL_FILTER_OPERATORS.includes(operator)) return undefined; + const rawValue = parts.slice(2).join(' '); + + let value: OnlineEvalFilter['value']; + if (rawValue === 'true' || rawValue === 'false') { + value = { booleanValue: rawValue === 'true' }; + } else if (/^-?\d+(\.\d+)?$/.test(rawValue)) { + value = { doubleValue: parseFloat(rawValue) }; + } else { + value = { stringValue: rawValue }; + } + filters.push({ key, operator, value }); + } + return filters; +} diff --git a/src/cli/tui/screens/online-eval/types.ts b/src/cli/tui/screens/online-eval/types.ts index 1a1e5940c..7d271967c 100644 --- a/src/cli/tui/screens/online-eval/types.ts +++ b/src/cli/tui/screens/online-eval/types.ts @@ -1,6 +1,8 @@ -// ───────────────────────────────────────────────────────────────────────────── +// ────────────────────────────────────────────────────────────────────────────── // Online Eval Config Flow Types -// ───────────────────────────────────────────────────────────────────────────── +// ────────────────────────────────────────────────────────────────────────────── + +import type { OnlineEvalFilter, OnlineEvalFilterOperator } from '../../../../schema'; export type AddOnlineEvalStep = | 'name' @@ -8,6 +10,8 @@ export type AddOnlineEvalStep = | 'endpoint' | 'evaluators' | 'samplingRate' + | 'sessionTimeout' + | 'filters' | 'enableOnCreate' | 'confirm'; @@ -17,6 +21,8 @@ export interface AddOnlineEvalConfig { endpoint?: string; evaluators: string[]; samplingRate: number; + sessionTimeoutMinutes?: number; + filters?: OnlineEvalFilter[]; enableOnCreate: boolean; description?: string; } @@ -33,13 +39,27 @@ export const ONLINE_EVAL_STEP_LABELS: Record = { endpoint: 'Endpoint', evaluators: 'Evaluators', samplingRate: 'Rate', + sessionTimeout: 'Timeout', + filters: 'Filters', enableOnCreate: 'Enable', confirm: 'Confirm', }; -// ───────────────────────────────────────────────────────────────────────────── +/** Filter operators offered in the wizard. */ +export const ONLINE_EVAL_FILTER_OPERATORS: OnlineEvalFilterOperator[] = [ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', +]; + +// ────────────────────────────────────────────────────────────────────────────── // Evaluator Items (fetched from API) -// ───────────────────────────────────────────────────────────────────────────── +// ────────────────────────────────────────────────────────────────────────────── export interface EvaluatorItem { /** ARN used as the stored identifier in the config */ @@ -53,3 +73,4 @@ export interface EvaluatorItem { } export const DEFAULT_SAMPLING_RATE = 10; +export const DEFAULT_SESSION_TIMEOUT_MINUTES = 5; diff --git a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts index 239a95edc..ea562fda2 100644 --- a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts +++ b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts @@ -1,3 +1,4 @@ +import type { OnlineEvalFilter } from '../../../../schema'; import type { AddOnlineEvalConfig, AddOnlineEvalStep } from './types'; import { DEFAULT_SAMPLING_RATE } from './types'; import { useCallback, useRef, useState } from 'react'; @@ -5,9 +6,19 @@ import { useCallback, useRef, useState } from 'react'; function getAllSteps(agentCount: number): AddOnlineEvalStep[] { if (agentCount <= 1) { // endpoint step is included but will be skipped dynamically when no endpoints exist - return ['name', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return ['name', 'endpoint', 'evaluators', 'samplingRate', 'sessionTimeout', 'filters', 'enableOnCreate', 'confirm']; } - return ['name', 'agent', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return [ + 'name', + 'agent', + 'endpoint', + 'evaluators', + 'samplingRate', + 'sessionTimeout', + 'filters', + 'enableOnCreate', + 'confirm', + ]; } function getDefaultConfig(): AddOnlineEvalConfig { @@ -17,6 +28,8 @@ function getDefaultConfig(): AddOnlineEvalConfig { endpoint: undefined, evaluators: [], samplingRate: DEFAULT_SAMPLING_RATE, + sessionTimeoutMinutes: undefined, + filters: undefined, enableOnCreate: true, }; } @@ -102,6 +115,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { [nextStep, setConfig, setStep] ); + const setSessionTimeoutMinutes = useCallback( + (sessionTimeoutMinutes: number | undefined) => { + setConfig(c => ({ ...c, sessionTimeoutMinutes })); + const next = nextStep('sessionTimeout'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setFilters = useCallback( + (filters: OnlineEvalFilter[] | undefined) => { + setConfig(c => ({ ...c, filters: filters && filters.length > 0 ? filters : undefined })); + const next = nextStep('filters'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + const setEnableOnCreate = useCallback( (enableOnCreate: boolean) => { setConfig(c => ({ ...c, enableOnCreate })); @@ -128,6 +159,8 @@ export function useAddOnlineEvalWizard(agentCount: number) { setEndpoint, setEvaluators, setSamplingRate, + setSessionTimeoutMinutes, + setFilters, setEnableOnCreate, reset, }; diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts index 10d164a2c..ca367a32c 100644 --- a/src/schema/schemas/agentcore-project.ts +++ b/src/schema/schemas/agentcore-project.ts @@ -34,7 +34,7 @@ export { }; export { EvaluationLevelSchema }; export type { MemoryStrategy, MemoryStrategyType } from './primitives/memory'; -export type { OnlineEvalConfig } from './primitives/online-eval-config'; +export type { OnlineEvalConfig, OnlineEvalFilter, OnlineEvalFilterOperator, OnlineEvalFilterValue } from './primitives/online-eval-config'; export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './primitives/online-eval-config'; export type { CodeBasedConfig, diff --git a/src/schema/schemas/primitives/index.ts b/src/schema/schemas/primitives/index.ts index a48985c84..f30ac5722 100644 --- a/src/schema/schemas/primitives/index.ts +++ b/src/schema/schemas/primitives/index.ts @@ -54,7 +54,7 @@ export { RatingScaleSchema, } from './evaluator'; -export type { OnlineEvalConfig } from './online-eval-config'; +export type { OnlineEvalConfig, OnlineEvalFilter, OnlineEvalFilterOperator, OnlineEvalFilterValue } from './online-eval-config'; export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './online-eval-config'; export type { Policy, PolicyEngine, ValidationMode } from './policy'; diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts index 5b6f13cb6..9bbd1ccb2 100644 --- a/src/schema/schemas/primitives/online-eval-config.ts +++ b/src/schema/schemas/primitives/online-eval-config.ts @@ -14,6 +14,36 @@ export const OnlineEvalConfigNameSchema = z 'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)' ); +/** Operators supported by online evaluation config filters. */ +export const OnlineEvalFilterOperatorSchema = z.enum([ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', +]); + +/** Value for a single filter — exactly one of stringValue/doubleValue/booleanValue should be set. */ +export const OnlineEvalFilterValueSchema = z + .object({ + stringValue: z.string().optional(), + doubleValue: z.number().optional(), + booleanValue: z.boolean().optional(), + }) + .refine( + v => [v.stringValue, v.doubleValue, v.booleanValue].filter(x => x !== undefined).length === 1, + 'Exactly one of stringValue, doubleValue, or booleanValue must be set' + ); + +export const OnlineEvalFilterSchema = z.object({ + key: z.string().min(1), + operator: OnlineEvalFilterOperatorSchema, + value: OnlineEvalFilterValueSchema, +}); + export const OnlineEvalConfigSchema = z.object({ name: OnlineEvalConfigNameSchema, /** Agent name to monitor (must match a project agent) */ @@ -26,9 +56,16 @@ export const OnlineEvalConfigSchema = z.object({ samplingRate: z.number().min(0.01).max(100), /** Optional description for the online eval config */ description: z.string().max(200).optional(), + /** Session idle timeout in minutes (1-1440). Default: 5 */ + sessionTimeoutMinutes: z.number().int().min(1).max(1440).optional(), + /** Optional filters that scope which agent traces are evaluated. */ + filters: z.array(OnlineEvalFilterSchema).optional(), /** Whether to enable execution on create (default: true) */ enableOnCreate: z.boolean().optional(), tags: TagsSchema.optional(), }); export type OnlineEvalConfig = z.infer; +export type OnlineEvalFilter = z.infer; +export type OnlineEvalFilterOperator = z.infer; +export type OnlineEvalFilterValue = z.infer; From 5002fbff207c820c7326f7842d30b17eeaa6bcc9 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 7 May 2026 18:39:20 +0000 Subject: [PATCH 2/6] test: cover sessionTimeoutMinutes and filters in OnlineEvalConfigPrimitive (#906) --- .../OnlineEvalConfigPrimitive.test.ts | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts b/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts index c81160a6c..8fe939991 100644 --- a/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts +++ b/src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts @@ -95,6 +95,47 @@ describe('OnlineEvalConfigPrimitive', () => { expect(config.enableOnCreate).toBeUndefined(); }); + it('stores sessionTimeoutMinutes and filters when provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + const result = await primitive.add({ + name: 'WithTimeoutAndFilters', + agent: 'MyAgent', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + sessionTimeoutMinutes: 30, + filters: [ + { key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }, + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + ], + }); + + expect(result.success).toBe(true); + const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0]; + expect(config.sessionTimeoutMinutes).toBe(30); + expect(config.filters).toEqual([ + { key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }, + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + ]); + }); + + it('omits sessionTimeoutMinutes and filters when not provided', async () => { + mockReadProjectSpec.mockResolvedValue(makeProject()); + mockWriteProjectSpec.mockResolvedValue(undefined); + + await primitive.add({ + name: 'NoTimeoutOrFilters', + agent: 'MyAgent', + evaluators: ['Builtin.GoalSuccessRate'], + samplingRate: 10, + }); + + const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0]; + expect(config.sessionTimeoutMinutes).toBeUndefined(); + expect(config.filters).toBeUndefined(); + }); + it('supports multiple evaluators including ARNs', async () => { mockReadProjectSpec.mockResolvedValue(makeProject()); mockWriteProjectSpec.mockResolvedValue(undefined); From 7954a0fea26a6d78f2818d9e5c7ab98cee57f48f Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 7 May 2026 18:51:23 +0000 Subject: [PATCH 3/6] fix: address reviewer feedback --- .../online-eval/AddOnlineEvalScreen.tsx | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index cdfd0de06..dfee528ca 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -275,12 +275,13 @@ export function AddOnlineEvalScreen({ Optional filters that scope which traces are evaluated. Format: {''} {''} {''}, separated - by ";". Operators: {ONLINE_EVAL_FILTER_OPERATORS.join(', ')}. Values are parsed as boolean (true/false), - number, or string. Leave blank for no filters. + by ";". Operators: {ONLINE_EVAL_FILTER_OPERATORS.join(', ')}. Bare {'`true`'}/{'`false`'} → boolean; bare + numbers → double; quoted {'`"..."`'} or anything else → string. Use quotes to force string type for + numeric-looking IDs or the literal words {'`"true"`'}/{'`"false"`'}. Leave blank for no filters. { const trimmed = value.trim(); @@ -363,7 +364,15 @@ function formatFilter(f: OnlineEvalFilter): string { /** * Parse a filter input string such as: - * "model Equals claude-3; latencyMs LessThan 1000; success Equals true" + * 'model Equals claude-3; latencyMs LessThan 1000; success Equals true' + * 'id Equals "12345"; flag Equals "true"' + * + * Value typing rules: + * - Double-quoted value (e.g. "12345") → stringValue (quotes stripped) + * - Bare `true` / `false` → booleanValue + * - Bare numeric (`-?\d+(\.\d+)?`) → doubleValue + * - Anything else → stringValue + * * Returns undefined if any segment is malformed. */ function parseFiltersInput(input: string): OnlineEvalFilter[] | undefined { @@ -375,20 +384,29 @@ function parseFiltersInput(input: string): OnlineEvalFilter[] | undefined { const filters: OnlineEvalFilter[] = []; for (const segment of segments) { - const parts = segment.split(/\s+/); - if (parts.length < 3) return undefined; - const key = parts[0]!; - const operator = parts[1] as OnlineEvalFilterOperator; + // Match: where is either a double-quoted + // string (with no embedded quotes) or a bare token sequence to end of line. + const match = segment.match(/^(\S+)\s+(\S+)\s+(?:"([^"]*)"|(.+?))\s*$/); + if (!match) return undefined; + const key = match[1]!; + const operator = match[2] as OnlineEvalFilterOperator; if (!ONLINE_EVAL_FILTER_OPERATORS.includes(operator)) return undefined; - const rawValue = parts.slice(2).join(' '); + const quoted = match[3]; + const bare = match[4]; let value: OnlineEvalFilter['value']; - if (rawValue === 'true' || rawValue === 'false') { - value = { booleanValue: rawValue === 'true' }; - } else if (/^-?\d+(\.\d+)?$/.test(rawValue)) { - value = { doubleValue: parseFloat(rawValue) }; + if (quoted !== undefined) { + // Explicit string — preserves "true", "false", "12345" as strings. + value = { stringValue: quoted }; } else { - value = { stringValue: rawValue }; + const rawValue = bare!; + if (rawValue === 'true' || rawValue === 'false') { + value = { booleanValue: rawValue === 'true' }; + } else if (/^-?\d+(\.\d+)?$/.test(rawValue)) { + value = { doubleValue: parseFloat(rawValue) }; + } else { + value = { stringValue: rawValue }; + } } filters.push({ key, operator, value }); } From ea6dc9c6f1773effb1c9edcd9adb8a334d8c13b9 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 7 May 2026 18:59:51 +0000 Subject: [PATCH 4/6] fix: address reviewer feedback --- .../online-eval/AddOnlineEvalScreen.tsx | 69 +-------- .../__tests__/filter-parser.test.ts | 138 ++++++++++++++++++ .../tui/screens/online-eval/filter-parser.ts | 81 ++++++++++ .../__tests__/online-eval-config.test.ts | 133 +++++++++++++++++ 4 files changed, 353 insertions(+), 68 deletions(-) create mode 100644 src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts create mode 100644 src/cli/tui/screens/online-eval/filter-parser.ts diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index dfee528ca..7c9a84f5b 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -1,5 +1,4 @@ import { OnlineEvalConfigNameSchema } from '../../../../schema'; -import type { OnlineEvalFilter, OnlineEvalFilterOperator } from '../../../../schema'; import type { SelectableItem } from '../../components'; import { ConfirmReview, @@ -13,6 +12,7 @@ import { import { HELP_TEXT } from '../../constants'; import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; +import { formatFilter, parseFiltersInput } from './filter-parser'; import type { AddOnlineEvalConfig, EvaluatorItem, RuntimeEndpointEntry } from './types'; import { DEFAULT_SAMPLING_RATE, @@ -345,70 +345,3 @@ export function AddOnlineEvalScreen({ ); } - -// ────────────────────────────────────────────────────────────────────────────── -// Filter parsing helpers -// ────────────────────────────────────────────────────────────────────────────── - -function formatFilter(f: OnlineEvalFilter): string { - const v = - f.value.stringValue !== undefined - ? f.value.stringValue - : f.value.doubleValue !== undefined - ? String(f.value.doubleValue) - : f.value.booleanValue !== undefined - ? String(f.value.booleanValue) - : ''; - return `${f.key} ${f.operator} ${v}`; -} - -/** - * Parse a filter input string such as: - * 'model Equals claude-3; latencyMs LessThan 1000; success Equals true' - * 'id Equals "12345"; flag Equals "true"' - * - * Value typing rules: - * - Double-quoted value (e.g. "12345") → stringValue (quotes stripped) - * - Bare `true` / `false` → booleanValue - * - Bare numeric (`-?\d+(\.\d+)?`) → doubleValue - * - Anything else → stringValue - * - * Returns undefined if any segment is malformed. - */ -function parseFiltersInput(input: string): OnlineEvalFilter[] | undefined { - const segments = input - .split(';') - .map(s => s.trim()) - .filter(s => s.length > 0); - if (segments.length === 0) return undefined; - - const filters: OnlineEvalFilter[] = []; - for (const segment of segments) { - // Match: where is either a double-quoted - // string (with no embedded quotes) or a bare token sequence to end of line. - const match = segment.match(/^(\S+)\s+(\S+)\s+(?:"([^"]*)"|(.+?))\s*$/); - if (!match) return undefined; - const key = match[1]!; - const operator = match[2] as OnlineEvalFilterOperator; - if (!ONLINE_EVAL_FILTER_OPERATORS.includes(operator)) return undefined; - const quoted = match[3]; - const bare = match[4]; - - let value: OnlineEvalFilter['value']; - if (quoted !== undefined) { - // Explicit string — preserves "true", "false", "12345" as strings. - value = { stringValue: quoted }; - } else { - const rawValue = bare!; - if (rawValue === 'true' || rawValue === 'false') { - value = { booleanValue: rawValue === 'true' }; - } else if (/^-?\d+(\.\d+)?$/.test(rawValue)) { - value = { doubleValue: parseFloat(rawValue) }; - } else { - value = { stringValue: rawValue }; - } - } - filters.push({ key, operator, value }); - } - return filters; -} diff --git a/src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts b/src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts new file mode 100644 index 000000000..43f71224d --- /dev/null +++ b/src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts @@ -0,0 +1,138 @@ +import { formatFilter, parseFiltersInput } from '../filter-parser'; +import { describe, expect, it } from 'vitest'; + +describe('parseFiltersInput', () => { + describe('value typing', () => { + it('treats bare true/false as booleans', () => { + const result = parseFiltersInput('success Equals true; failed Equals false'); + expect(result).toEqual([ + { key: 'success', operator: 'Equals', value: { booleanValue: true } }, + { key: 'failed', operator: 'Equals', value: { booleanValue: false } }, + ]); + }); + + it('treats bare integer values as doubles', () => { + const result = parseFiltersInput('latencyMs LessThan 1000'); + expect(result).toEqual([ + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + ]); + }); + + it('treats bare decimal values as doubles', () => { + const result = parseFiltersInput('score GreaterThan -0.5'); + expect(result).toEqual([ + { key: 'score', operator: 'GreaterThan', value: { doubleValue: -0.5 } }, + ]); + }); + + it('treats bare non-numeric, non-boolean values as strings', () => { + const result = parseFiltersInput('model Equals claude-3'); + expect(result).toEqual([ + { key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }, + ]); + }); + + it('treats double-quoted values as strings even when they look numeric', () => { + const result = parseFiltersInput('id Equals "12345"'); + expect(result).toEqual([{ key: 'id', operator: 'Equals', value: { stringValue: '12345' } }]); + }); + + it('treats double-quoted "true"/"false" as strings, not booleans', () => { + const result = parseFiltersInput('flag Equals "true"'); + expect(result).toEqual([{ key: 'flag', operator: 'Equals', value: { stringValue: 'true' } }]); + }); + + it('treats quoted empty string as an empty stringValue', () => { + const result = parseFiltersInput('note Equals ""'); + expect(result).toEqual([{ key: 'note', operator: 'Equals', value: { stringValue: '' } }]); + }); + + it('preserves multi-word bare values verbatim', () => { + const result = parseFiltersInput('label Contains hello world'); + expect(result).toEqual([ + { key: 'label', operator: 'Contains', value: { stringValue: 'hello world' } }, + ]); + }); + }); + + describe('operator handling', () => { + it('accepts every supported operator', () => { + const operators = [ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', + ]; + for (const op of operators) { + const result = parseFiltersInput(`k ${op} v`); + expect(result, `operator ${op}`).toEqual([ + { key: 'k', operator: op, value: { stringValue: 'v' } }, + ]); + } + }); + + it('returns undefined when the operator is unknown', () => { + expect(parseFiltersInput('k FooBar v')).toBeUndefined(); + }); + + it('is case-sensitive on operator names', () => { + expect(parseFiltersInput('k equals v')).toBeUndefined(); + }); + }); + + describe('segment splitting', () => { + it('splits on ";" and trims each segment', () => { + const result = parseFiltersInput(' a Equals 1 ; b NotEquals 2 '); + expect(result).toEqual([ + { key: 'a', operator: 'Equals', value: { doubleValue: 1 } }, + { key: 'b', operator: 'NotEquals', value: { doubleValue: 2 } }, + ]); + }); + + it('ignores empty segments produced by trailing or doubled ";"', () => { + const result = parseFiltersInput('a Equals 1;;b Equals 2;'); + expect(result).toHaveLength(2); + }); + + it('returns undefined when input has no non-empty segments', () => { + expect(parseFiltersInput('')).toBeUndefined(); + expect(parseFiltersInput(' ;; ')).toBeUndefined(); + }); + }); + + describe('failure modes', () => { + it('returns undefined when a segment has fewer than three parts', () => { + expect(parseFiltersInput('onlykey')).toBeUndefined(); + expect(parseFiltersInput('key Equals')).toBeUndefined(); + }); + + it('returns undefined when any segment in a list is malformed', () => { + // First segment is fine; second is missing the value. + expect(parseFiltersInput('a Equals 1; b Equals')).toBeUndefined(); + }); + }); +}); + +describe('formatFilter', () => { + it('renders string values', () => { + expect( + formatFilter({ key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } }) + ).toBe('model Equals claude-3'); + }); + + it('renders double values', () => { + expect( + formatFilter({ key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }) + ).toBe('latencyMs LessThan 1000'); + }); + + it('renders boolean values', () => { + expect( + formatFilter({ key: 'success', operator: 'Equals', value: { booleanValue: true } }) + ).toBe('success Equals true'); + }); +}); diff --git a/src/cli/tui/screens/online-eval/filter-parser.ts b/src/cli/tui/screens/online-eval/filter-parser.ts new file mode 100644 index 000000000..9c7062e55 --- /dev/null +++ b/src/cli/tui/screens/online-eval/filter-parser.ts @@ -0,0 +1,81 @@ +// ────────────────────────────────────────────────────────────────────────────── +// Filter parsing helpers for the online-eval `agentcore add` wizard. +// +// Kept as a sibling module (rather than co-located with the React component) +// so it is independently unit-testable. +// ────────────────────────────────────────────────────────────────────────────── + +import type { OnlineEvalFilter, OnlineEvalFilterOperator } from '../../../../schema'; +import { ONLINE_EVAL_FILTER_OPERATORS } from './types'; + +/** + * Render a single filter as the canonical " " string + * shown in the confirm screen. Quoting is *not* re-applied to string values + * because this is for display only; round-trip through the parser is not the + * goal here. + */ +export function formatFilter(f: OnlineEvalFilter): string { + const v = + f.value.stringValue !== undefined + ? f.value.stringValue + : f.value.doubleValue !== undefined + ? String(f.value.doubleValue) + : f.value.booleanValue !== undefined + ? String(f.value.booleanValue) + : ''; + return `${f.key} ${f.operator} ${v}`; +} + +/** + * Parse a filter input string such as: + * 'model Equals claude-3; latencyMs LessThan 1000; success Equals true' + * 'id Equals "12345"; flag Equals "true"' + * + * Value typing rules: + * - Double-quoted value (e.g. "12345") → stringValue (quotes stripped) + * - Bare `true` / `false` → booleanValue + * - Bare numeric (`-?\d+(\.\d+)?`) → doubleValue + * - Anything else → stringValue + * + * Returns undefined if any segment is malformed or contains an unknown operator. + * An empty input (no non-empty segments after splitting on `;`) also returns + * undefined so callers can distinguish "user cleared the field" from "user + * supplied at least one filter". + */ +export function parseFiltersInput(input: string): OnlineEvalFilter[] | undefined { + const segments = input + .split(';') + .map(s => s.trim()) + .filter(s => s.length > 0); + if (segments.length === 0) return undefined; + + const filters: OnlineEvalFilter[] = []; + for (const segment of segments) { + // Match: where is either a double-quoted + // string (with no embedded quotes) or a bare token sequence to end of line. + const match = segment.match(/^(\S+)\s+(\S+)\s+(?:"([^"]*)"|(.+?))\s*$/); + if (!match) return undefined; + const key = match[1]!; + const operator = match[2] as OnlineEvalFilterOperator; + if (!ONLINE_EVAL_FILTER_OPERATORS.includes(operator)) return undefined; + const quoted = match[3]; + const bare = match[4]; + + let value: OnlineEvalFilter['value']; + if (quoted !== undefined) { + // Explicit string — preserves "true", "false", "12345" as strings. + value = { stringValue: quoted }; + } else { + const rawValue = bare!; + if (rawValue === 'true' || rawValue === 'false') { + value = { booleanValue: rawValue === 'true' }; + } else if (/^-?\d+(\.\d+)?$/.test(rawValue)) { + value = { doubleValue: parseFloat(rawValue) }; + } else { + value = { stringValue: rawValue }; + } + } + filters.push({ key, operator, value }); + } + return filters; +} diff --git a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts index e6e940948..e60b19955 100644 --- a/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts +++ b/src/schema/schemas/primitives/__tests__/online-eval-config.test.ts @@ -98,4 +98,137 @@ describe('OnlineEvalConfigSchema', () => { it('accepts config without description and enableOnCreate', () => { expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); }); + + describe('sessionTimeoutMinutes', () => { + it('accepts boundary value 1', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 1 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts boundary value 1440', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 1440 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('rejects 0', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 0 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects 1441', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 1441 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects non-integer values', () => { + const config = { ...validConfig, sessionTimeoutMinutes: 5.5 }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('omitting the field is valid (uses construct default)', () => { + expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); + }); + }); + + describe('filters', () => { + const baseFilter = { + key: 'model', + operator: 'Equals' as const, + value: { stringValue: 'claude-3' }, + }; + + it('accepts a single valid filter with a stringValue', () => { + const config = { ...validConfig, filters: [baseFilter] }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts filters with doubleValue and booleanValue', () => { + const config = { + ...validConfig, + filters: [ + { key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } }, + { key: 'success', operator: 'Equals', value: { booleanValue: true } }, + ], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts every supported operator', () => { + const operators = [ + 'Equals', + 'NotEquals', + 'GreaterThan', + 'LessThan', + 'GreaterThanOrEqual', + 'LessThanOrEqual', + 'Contains', + 'NotContains', + ] as const; + for (const op of operators) { + const config = { ...validConfig, filters: [{ ...baseFilter, operator: op }] }; + expect(OnlineEvalConfigSchema.safeParse(config).success, `operator ${op}`).toBe(true); + } + }); + + it('rejects an invalid operator', () => { + const config = { + ...validConfig, + filters: [{ ...baseFilter, operator: 'StartsWith' }], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with zero value variants set', () => { + const config = { + ...validConfig, + filters: [{ key: 'model', operator: 'Equals', value: {} }], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with two value variants set simultaneously', () => { + const config = { + ...validConfig, + filters: [ + { + key: 'model', + operator: 'Equals', + value: { stringValue: 'claude-3', doubleValue: 1 }, + }, + ], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with all three value variants set', () => { + const config = { + ...validConfig, + filters: [ + { + key: 'model', + operator: 'Equals', + value: { stringValue: 'x', doubleValue: 1, booleanValue: true }, + }, + ], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('rejects a filter with an empty key', () => { + const config = { + ...validConfig, + filters: [{ ...baseFilter, key: '' }], + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(false); + }); + + it('omitting the field is valid', () => { + expect(OnlineEvalConfigSchema.safeParse(validConfig).success).toBe(true); + }); + + it('accepts an empty filters array', () => { + const config = { ...validConfig, filters: [] }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + }); }); From e9b92006c4ec2cdd195da01b8738ff4869db0176 Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 7 May 2026 19:05:39 +0000 Subject: [PATCH 5/6] fix: address reviewer feedback --- src/assets/cdk/package.json | 2 +- src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/assets/cdk/package.json b/src/assets/cdk/package.json index aa58892c2..0c6270f5f 100644 --- a/src/assets/cdk/package.json +++ b/src/assets/cdk/package.json @@ -23,7 +23,7 @@ "typescript": "~5.9.3" }, "dependencies": { - "@aws/agentcore-cdk": "^0.1.0-alpha.19", + "@aws/agentcore-cdk": "^0.1.0-alpha.29", "aws-cdk-lib": "^2.248.0", "constructs": "^10.0.0" } diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index 7c9a84f5b..768022e7e 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -281,7 +281,7 @@ export function AddOnlineEvalScreen({ { const trimmed = value.trim(); From 127b3e7604fe5a8c9c8ed620fd028a541aba65aa Mon Sep 17 00:00:00 2001 From: Agent Date: Thu, 7 May 2026 19:13:24 +0000 Subject: [PATCH 6/6] fix: address reviewer feedback --- src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap | 2 +- src/assets/cdk/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap index a97bbeb1d..f1c6b5cd2 100644 --- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap +++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap @@ -357,7 +357,7 @@ exports[`Assets Directory Snapshots > CDK assets > cdk/cdk/package.json should m "typescript": "~5.9.3" }, "dependencies": { - "@aws/agentcore-cdk": "^0.1.0-alpha.19", + "@aws/agentcore-cdk": "^0.1.0-alpha.28", "aws-cdk-lib": "^2.248.0", "constructs": "^10.0.0" } diff --git a/src/assets/cdk/package.json b/src/assets/cdk/package.json index 0c6270f5f..073aaa294 100644 --- a/src/assets/cdk/package.json +++ b/src/assets/cdk/package.json @@ -23,7 +23,7 @@ "typescript": "~5.9.3" }, "dependencies": { - "@aws/agentcore-cdk": "^0.1.0-alpha.29", + "@aws/agentcore-cdk": "^0.1.0-alpha.28", "aws-cdk-lib": "^2.248.0", "constructs": "^10.0.0" }