Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ exports[`Assets Directory Snapshots > CDK assets > cdk/cdk/package.json should m
"typescript": "~5.9.3"
},
"dependencies": {
"@aws/agentcore-cdk": "^0.1.0-alpha.19",
"@aws/agentcore-cdk": "^0.1.0-alpha.28",
"aws-cdk-lib": "^2.248.0",
"constructs": "^10.0.0"
}
Expand Down
2 changes: 1 addition & 1 deletion src/assets/cdk/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"typescript": "~5.9.3"
},
"dependencies": {
"@aws/agentcore-cdk": "^0.1.0-alpha.19",
"@aws/agentcore-cdk": "^0.1.0-alpha.28",
"aws-cdk-lib": "^2.248.0",
"constructs": "^10.0.0"
}
Expand Down
6 changes: 5 additions & 1 deletion src/cli/primitives/OnlineEvalConfigPrimitive.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { findConfigRoot } from '../../lib';
import type { OnlineEvalConfig } from '../../schema';
import type { OnlineEvalConfig, OnlineEvalFilter } from '../../schema';
import { OnlineEvalConfigSchema } from '../../schema';
import { getErrorMessage } from '../errors';
import type { RemovalPreview, RemovalResult, SchemaChange } from '../operations/remove/types';
Expand All @@ -16,6 +16,8 @@ export interface AddOnlineEvalConfigOptions {
samplingRate: number;
enableOnCreate?: boolean;
endpoint?: string;
sessionTimeoutMinutes?: number;
filters?: OnlineEvalFilter[];
}

export type RemovableOnlineEvalConfig = RemovableResource;
Expand Down Expand Up @@ -235,6 +237,8 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive<AddOnlineEvalConfig
samplingRate: options.samplingRate,
...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }),
...(options.endpoint && { endpoint: options.endpoint }),
...(options.sessionTimeoutMinutes !== undefined && { sessionTimeoutMinutes: options.sessionTimeoutMinutes }),
...(options.filters && options.filters.length > 0 && { filters: options.filters }),
};

project.onlineEvalConfigs.push(config);
Expand Down
41 changes: 41 additions & 0 deletions src/cli/primitives/__tests__/OnlineEvalConfigPrimitive.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,47 @@ describe('OnlineEvalConfigPrimitive', () => {
expect(config.enableOnCreate).toBeUndefined();
});

it('stores sessionTimeoutMinutes and filters when provided', async () => {
mockReadProjectSpec.mockResolvedValue(makeProject());
mockWriteProjectSpec.mockResolvedValue(undefined);

const result = await primitive.add({
name: 'WithTimeoutAndFilters',
agent: 'MyAgent',
evaluators: ['Builtin.GoalSuccessRate'],
samplingRate: 10,
sessionTimeoutMinutes: 30,
filters: [
{ key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } },
{ key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } },
],
});

expect(result.success).toBe(true);
const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0];
expect(config.sessionTimeoutMinutes).toBe(30);
expect(config.filters).toEqual([
{ key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } },
{ key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } },
]);
});

it('omits sessionTimeoutMinutes and filters when not provided', async () => {
mockReadProjectSpec.mockResolvedValue(makeProject());
mockWriteProjectSpec.mockResolvedValue(undefined);

await primitive.add({
name: 'NoTimeoutOrFilters',
agent: 'MyAgent',
evaluators: ['Builtin.GoalSuccessRate'],
samplingRate: 10,
});

const config = mockWriteProjectSpec.mock.calls[0]![0].onlineEvalConfigs[0];
expect(config.sessionTimeoutMinutes).toBeUndefined();
expect(config.filters).toBeUndefined();
});

it('supports multiple evaluators including ARNs', async () => {
mockReadProjectSpec.mockResolvedValue(makeProject());
mockWriteProjectSpec.mockResolvedValue(undefined);
Expand Down
3 changes: 3 additions & 0 deletions src/cli/tui/hooks/useCreateOnlineEval.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { onlineEvalConfigPrimitive } from '../../primitives/registry';
import type { OnlineEvalFilter } from '../../../schema';
import { withAddTelemetry } from '../../telemetry/cli-command-run.js';
import { useCallback, useEffect, useState } from 'react';

Expand All @@ -9,6 +10,7 @@ interface CreateOnlineEvalConfig {
evaluators: string[];
samplingRate: number;
sessionTimeoutMinutes?: number;
filters?: OnlineEvalFilter[];
enableOnCreate: boolean;
}

Expand All @@ -34,6 +36,7 @@ export function useCreateOnlineEval() {
evaluators: config.evaluators,
samplingRate: config.samplingRate,
...(config.sessionTimeoutMinutes !== undefined && { sessionTimeoutMinutes: config.sessionTimeoutMinutes }),
...(config.filters && config.filters.length > 0 && { filters: config.filters }),
enableOnCreate: config.enableOnCreate,
})
);
Expand Down
92 changes: 91 additions & 1 deletion src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,14 @@
import { HELP_TEXT } from '../../constants';
import { useListNavigation, useMultiSelectNavigation } from '../../hooks';
import { generateUniqueName } from '../../utils';
import { formatFilter, parseFiltersInput } from './filter-parser';
import type { AddOnlineEvalConfig, EvaluatorItem, RuntimeEndpointEntry } from './types';
import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types';
import {
DEFAULT_SAMPLING_RATE,
DEFAULT_SESSION_TIMEOUT_MINUTES,
ONLINE_EVAL_FILTER_OPERATORS,
ONLINE_EVAL_STEP_LABELS,
} from './types';
import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard';
import { Box, Text } from 'ink';
import React, { useCallback, useEffect, useMemo } from 'react';
Expand Down Expand Up @@ -99,6 +105,8 @@
const isEndpointStep = wizard.step === 'endpoint';
const isEvaluatorsStep = wizard.step === 'evaluators';
const isSamplingRateStep = wizard.step === 'samplingRate';
const isSessionTimeoutStep = wizard.step === 'sessionTimeout';
const isFiltersStep = wizard.step === 'filters';
const isEnableOnCreateStep = wizard.step === 'enableOnCreate';
const isConfirmStep = wizard.step === 'confirm';

Expand Down Expand Up @@ -230,6 +238,74 @@
</Box>
)}

{isSessionTimeoutStep && (
<Box flexDirection="column">
<Text dimColor>
Minutes of inactivity before an agent session is considered complete (1–1440). Leave blank to use the
default of {DEFAULT_SESSION_TIMEOUT_MINUTES}.
</Text>
<TextInput
key="sessionTimeout"
prompt="Session timeout (minutes, blank=default)"
initialValue=""
onSubmit={value => {
const trimmed = value.trim();
if (trimmed === '') {
wizard.setSessionTimeoutMinutes(undefined);
return;
}
const minutes = parseInt(trimmed, 10);
if (isNaN(minutes) || minutes < 1 || minutes > 1440) return;
wizard.setSessionTimeoutMinutes(minutes);
}}
onCancel={() => wizard.goBack()}
customValidation={value => {
const trimmed = value.trim();
if (trimmed === '') return true;
const minutes = parseInt(trimmed, 10);
if (isNaN(minutes)) return 'Must be an integer or blank';
if (minutes < 1 || minutes > 1440) return 'Must be between 1 and 1440';
return true;
}}
/>
</Box>
)}

{isFiltersStep && (
<Box flexDirection="column">
<Text dimColor>
Optional filters that scope which traces are evaluated. Format: {'<key>'} {'<op>'} {'<value>'}, separated
by ";". Operators: {ONLINE_EVAL_FILTER_OPERATORS.join(', ')}. Bare {'`true`'}/{'`false`'} → boolean; bare

Check failure on line 278 in src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx

View workflow job for this annotation

GitHub Actions / lint

`"` can be escaped with `&quot;`, `&ldquo;`, `&#34;`, `&rdquo;`

Check failure on line 278 in src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx

View workflow job for this annotation

GitHub Actions / lint

`"` can be escaped with `&quot;`, `&ldquo;`, `&#34;`, `&rdquo;`
numbers → double; quoted {'`"..."`'} or anything else → string. Use quotes to force string type for
numeric-looking IDs or the literal words {'`"true"`'}/{'`"false"`'}. Leave blank for no filters.
</Text>
<TextInput
key="filters"
prompt='Filters (e.g. model Equals claude-3; id Equals "12345"; success Equals true)'
initialValue=""
onSubmit={value => {
const trimmed = value.trim();
if (trimmed === '') {
wizard.setFilters(undefined);
return;
}
const parsed = parseFiltersInput(trimmed);
if (!parsed) return;
wizard.setFilters(parsed);
}}
onCancel={() => wizard.goBack()}
customValidation={value => {
const trimmed = value.trim();
if (trimmed === '') return true;
const parsed = parseFiltersInput(trimmed);
if (!parsed)
return 'Each filter must be "<key> <operator> <value>" with a valid operator (separate with ";")';
return true;
}}
/>
</Box>
)}

{isEnableOnCreateStep && (
<WizardSelect
title="Enable on deploy?"
Expand All @@ -247,6 +323,20 @@
...(effectiveConfig.endpoint ? [{ label: 'Endpoint', value: effectiveConfig.endpoint }] : []),
{ label: 'Evaluators', value: effectiveConfig.evaluators.join(', ') },
{ label: 'Sampling Rate', value: `${effectiveConfig.samplingRate}%` },
{
label: 'Session Timeout',
value:
effectiveConfig.sessionTimeoutMinutes !== undefined
? `${effectiveConfig.sessionTimeoutMinutes} min`
: `${DEFAULT_SESSION_TIMEOUT_MINUTES} min (default)`,
},
{
label: 'Filters',
value:
effectiveConfig.filters && effectiveConfig.filters.length > 0
? effectiveConfig.filters.map(formatFilter).join('; ')
: '(none)',
},
{ label: 'Enable on Deploy', value: effectiveConfig.enableOnCreate ? 'Yes' : 'No' },
]}
/>
Expand Down
138 changes: 138 additions & 0 deletions src/cli/tui/screens/online-eval/__tests__/filter-parser.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import { formatFilter, parseFiltersInput } from '../filter-parser';
import { describe, expect, it } from 'vitest';

describe('parseFiltersInput', () => {
describe('value typing', () => {
it('treats bare true/false as booleans', () => {
const result = parseFiltersInput('success Equals true; failed Equals false');
expect(result).toEqual([
{ key: 'success', operator: 'Equals', value: { booleanValue: true } },
{ key: 'failed', operator: 'Equals', value: { booleanValue: false } },
]);
});

it('treats bare integer values as doubles', () => {
const result = parseFiltersInput('latencyMs LessThan 1000');
expect(result).toEqual([
{ key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } },
]);
});

it('treats bare decimal values as doubles', () => {
const result = parseFiltersInput('score GreaterThan -0.5');
expect(result).toEqual([
{ key: 'score', operator: 'GreaterThan', value: { doubleValue: -0.5 } },
]);
});

it('treats bare non-numeric, non-boolean values as strings', () => {
const result = parseFiltersInput('model Equals claude-3');
expect(result).toEqual([
{ key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } },
]);
});

it('treats double-quoted values as strings even when they look numeric', () => {
const result = parseFiltersInput('id Equals "12345"');
expect(result).toEqual([{ key: 'id', operator: 'Equals', value: { stringValue: '12345' } }]);
});

it('treats double-quoted "true"/"false" as strings, not booleans', () => {
const result = parseFiltersInput('flag Equals "true"');
expect(result).toEqual([{ key: 'flag', operator: 'Equals', value: { stringValue: 'true' } }]);
});

it('treats quoted empty string as an empty stringValue', () => {
const result = parseFiltersInput('note Equals ""');
expect(result).toEqual([{ key: 'note', operator: 'Equals', value: { stringValue: '' } }]);
});

it('preserves multi-word bare values verbatim', () => {
const result = parseFiltersInput('label Contains hello world');
expect(result).toEqual([
{ key: 'label', operator: 'Contains', value: { stringValue: 'hello world' } },
]);
});
});

describe('operator handling', () => {
it('accepts every supported operator', () => {
const operators = [
'Equals',
'NotEquals',
'GreaterThan',
'LessThan',
'GreaterThanOrEqual',
'LessThanOrEqual',
'Contains',
'NotContains',
];
for (const op of operators) {
const result = parseFiltersInput(`k ${op} v`);
expect(result, `operator ${op}`).toEqual([
{ key: 'k', operator: op, value: { stringValue: 'v' } },
]);
}
});

it('returns undefined when the operator is unknown', () => {
expect(parseFiltersInput('k FooBar v')).toBeUndefined();
});

it('is case-sensitive on operator names', () => {
expect(parseFiltersInput('k equals v')).toBeUndefined();
});
});

describe('segment splitting', () => {
it('splits on ";" and trims each segment', () => {
const result = parseFiltersInput(' a Equals 1 ; b NotEquals 2 ');
expect(result).toEqual([
{ key: 'a', operator: 'Equals', value: { doubleValue: 1 } },
{ key: 'b', operator: 'NotEquals', value: { doubleValue: 2 } },
]);
});

it('ignores empty segments produced by trailing or doubled ";"', () => {
const result = parseFiltersInput('a Equals 1;;b Equals 2;');
expect(result).toHaveLength(2);
});

it('returns undefined when input has no non-empty segments', () => {
expect(parseFiltersInput('')).toBeUndefined();
expect(parseFiltersInput(' ;; ')).toBeUndefined();
});
});

describe('failure modes', () => {
it('returns undefined when a segment has fewer than three parts', () => {
expect(parseFiltersInput('onlykey')).toBeUndefined();
expect(parseFiltersInput('key Equals')).toBeUndefined();
});

it('returns undefined when any segment in a list is malformed', () => {
// First segment is fine; second is missing the value.
expect(parseFiltersInput('a Equals 1; b Equals')).toBeUndefined();
});
});
});

describe('formatFilter', () => {
it('renders string values', () => {
expect(
formatFilter({ key: 'model', operator: 'Equals', value: { stringValue: 'claude-3' } })
).toBe('model Equals claude-3');
});

it('renders double values', () => {
expect(
formatFilter({ key: 'latencyMs', operator: 'LessThan', value: { doubleValue: 1000 } })
).toBe('latencyMs LessThan 1000');
});

it('renders boolean values', () => {
expect(
formatFilter({ key: 'success', operator: 'Equals', value: { booleanValue: true } })
).toBe('success Equals true');
});
});
Loading
Loading