Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions e2e-tests/harness-aws-skills.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { createHarnessE2ESuite } from './harness-e2e-helper.js';

// AWS Skills deployed end-to-end: create → add skill --aws-skills → deploy → invoke → confirm the
// deployed agent actually loaded the skills → teardown. Uses disabled memory so the deploy is fast
// (no managed-memory provisioning) — this suite is about skills, not memory, which managed-memory
// e2e covers. The 'loads AWS skills' step asks the agent what skills it has and asserts it references
// the AWS ones, proving the spec → CFN → runtime path works, not just that the flag parses.
createHarnessE2ESuite({
modelProvider: 'bedrock',
awsSkills: 'core-skills/*',
skipMemory: true,
labelSuffix: 'aws-skills',
});
131 changes: 127 additions & 4 deletions e2e-tests/harness-e2e-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,36 @@ interface HarnessE2EConfig {
modelProvider: 'bedrock' | 'open_ai' | 'gemini';
/** Env var holding the API key ARN — its value is passed as --api-key-arn. */
apiKeyArnEnvVar?: string;
/** Skip memory entirely (`create --no-harness-memory` → disabled). Mutually exclusive with memoryRoundTrip. */
skipMemory?: boolean;
skipInvoke?: boolean;
/**
* After creating the (managed-memory) harness, exercise a 2-turn memory round-trip: state a fact,
* then in the SAME session confirm it's recalled. Proves the execution role's memory data-plane
* grant actually works at runtime (the AccessDenied-on-ListEvents scenario) — not just that
* invoke returns 200. Only meaningful when the harness has managed (or omitted) memory.
*/
memoryRoundTrip?: boolean;
/**
* AWS Skills to attach to the harness via `add skill --aws-skills <value>` before deploy. Use ''
* for all skills, or a comma-separated path filter (e.g. 'core-skills/*'). When set, an extra
* step asks the deployed agent what skills it has and asserts the response references them.
*/
awsSkills?: string;
/** Suffix for the suite label, so multiple suites with the same provider don't collide in output. */
labelSuffix?: string;
}

export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
const hasRequiredVar = !cfg.apiKeyArnEnvVar || !!process.env[cfg.apiKeyArnEnvVar];
const canRun = baseCanRun && hasRequiredVar;

const providerLabel =
cfg.modelProvider === 'open_ai' ? 'OpenAI' : cfg.modelProvider === 'gemini' ? 'Gemini' : 'Bedrock';
(cfg.modelProvider === 'open_ai' ? 'OpenAI' : cfg.modelProvider === 'gemini' ? 'Gemini' : 'Bedrock') +
(cfg.labelSuffix ? `/${cfg.labelSuffix}` : '');

// note: this is created outside of beforeAll since beforeAll is skipped if all tests are skipped.
const logger = getLogger(`harness-${providerLabel.toLowerCase()}`);
const logger = getLogger(`harness-${providerLabel.toLowerCase().replace('/', '-')}`);
if (!canRun) {
logger.warn(
`tests are skipped due to insufficient conditions. ` +
Expand Down Expand Up @@ -74,6 +91,16 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
const json = parseJsonOutput(result.stdout) as { projectPath: string };
projectPath = json.projectPath;

// Attach AWS Skills to the harness before deploy, if configured. '' means all skills; a
// non-empty value is a comma-separated path filter passed as the --aws-skills argument.
if (cfg.awsSkills !== undefined) {
const skillArgs = ['add', 'skill', '--harness', harnessName, '--aws-skills'];
if (cfg.awsSkills) skillArgs.push(cfg.awsSkills);
skillArgs.push('--json');
const skillResult = await runAgentCoreCLI(skillArgs, projectPath);
expect(skillResult.exitCode, `add skill --aws-skills failed: ${skillResult.stderr}`).toBe(0);
}

await writeAwsTargets(projectPath);
installCdkTarball(projectPath);
}, 300000);
Expand All @@ -91,6 +118,9 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
async () => {
expect(projectPath, 'Project should have been created').toBeTruthy();

// Retry the deploy: managed-memory provisioning (3-5 min) can run long, and CFN/global-setup
// contention occasionally surfaces a transient non-zero on the first attempt. `deploy` is
// idempotent (re-applies the same stack), so a second attempt is safe and lands the stack.
await retry(
async () => {
const result = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath);
Expand All @@ -100,11 +130,11 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
const json = parseJsonOutput(result.stdout) as { success: boolean };
expect(json.success, 'Deploy should report success').toBe(true);
},
1,
2,
30000
);
},
600000
900000
);

it.skipIf(!canRun || !!cfg.skipInvoke)(
Expand All @@ -131,6 +161,99 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
180000
);

// Memory round-trip: prove the execution-role memory data-plane grant works at runtime. The
// harness must read/write its managed memory without AccessDenied (the bug #286/#287 fixed) —
// an invoke that returns 200 isn't enough; the SECOND turn must recall the fact from the FIRST.
it.skipIf(!canRun || !cfg.memoryRoundTrip)(
'remembers a fact across turns (managed memory round-trip)',
async () => {
// 33+ char session id (service constraint), no random/Date in the literal source needed —
// harnessName already carries a per-run timestamp suffix, which is enough to isolate runs.
const sessionId = `e2e-mem-roundtrip-${harnessName}-padding`;

const turn1 = await runAgentCoreCLI(
[
'invoke',
'--harness',
harnessName,
'--session-id',
sessionId,
'--prompt',
'My favorite color is teal. Remember it.',
'--json',
],
projectPath
);
expect(turn1.exitCode, `Turn 1 failed: stderr=${turn1.stderr}, stdout=${turn1.stdout}`).toBe(0);
const t1 = parseJsonOutput(turn1.stdout) as { success: boolean; response?: string };
expect(t1.success, 'Turn 1 invoke should succeed (no AccessDenied on memory)').toBe(true);

// Recall in a fresh invoke on the same session — the harness must read its memory store.
const turn2 = await retry(
async () => {
const result = await runAgentCoreCLI(
[
'invoke',
'--harness',
harnessName,
'--session-id',
sessionId,
'--prompt',
'What is my favorite color? Answer with just the color.',
'--json',
],
projectPath
);
expect(result.exitCode, `Turn 2 failed: stderr=${result.stderr}, stdout=${result.stdout}`).toBe(0);
const json = parseJsonOutput(result.stdout) as { success: boolean; response?: string };
expect(json.success, 'Turn 2 invoke should succeed').toBe(true);
expect(
(json.response ?? '').toLowerCase(),
`Harness should recall "teal" from memory; got: ${json.response}`
).toContain('teal');
return json;
},
3,
15000
);
expect(turn2.success).toBe(true);
},
240000
);

// AWS Skills: prove the deployed agent actually loaded the configured skills (not just that the
// spec carried them). Ask the agent what skills/tools it has and assert it references AWS ones.
it.skipIf(!canRun || cfg.awsSkills === undefined)(
'loads AWS skills on the deployed harness',
async () => {
await retry(
async () => {
const result = await runAgentCoreCLI(
[
'invoke',
'--harness',
harnessName,
'--prompt',
'List the AWS skills or tools you have access to. Be brief.',
'--json',
],
projectPath
);
expect(result.exitCode, `Skills invoke failed: stderr=${result.stderr}, stdout=${result.stdout}`).toBe(0);
const json = parseJsonOutput(result.stdout) as { success: boolean; response?: string };
expect(json.success, 'Skills invoke should succeed').toBe(true);
expect(
(json.response ?? '').toLowerCase(),
`Agent should reference its loaded AWS skills; got: ${json.response}`
).toContain('aws');
},
3,
15000
);
},
240000
);

it.skipIf(!canRun)(
'status shows the deployed harness',
async () => {
Expand Down
8 changes: 8 additions & 0 deletions e2e-tests/harness-managed-memory.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { createHarnessE2ESuite } from './harness-e2e-helper.js';

// Managed memory (the default) deployed end-to-end: create → deploy (provisions a dedicated
// AgentCore Memory) → invoke → memory round-trip → teardown. The round-trip step is the load-bearing
// assertion: it proves the harness execution role can read/write its managed memory at runtime
// without AccessDenied on bedrock-agentcore:ListEvents — the regression #286/#287 fixed and that the
// ungating must not reintroduce.
createHarnessE2ESuite({ modelProvider: 'bedrock', memoryRoundTrip: true, labelSuffix: 'managed-memory' });
88 changes: 62 additions & 26 deletions integ-tests/add-remove-harness.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,15 @@ describe('integration: harness add/remove lifecycle', () => {
expect(await exists(promptPath), 'system-prompt.md should exist').toBe(true);
});

it('auto-creates memory resource', async () => {
it('defaults to managed memory and creates NO sibling memory resource', async () => {
// The harness owns its memory internally (managed mode). It must NOT auto-create a
// `${name}Memory` sibling in the project (the legacy pre-managed-memory behavior).
const spec = await readHarnessSpec(project.projectPath, harnessName);
expect(spec.memory?.mode, 'default harness memory should be managed').toBe('managed');

const config = await readProjectConfig(project.projectPath);
const memories = config.memories ?? [];
expect(memories.length, 'Should have auto-created memory').toBeGreaterThan(0);
const sibling = (config.memories ?? []).find((m: { name: string }) => m.name === `${harnessName}Memory`);
expect(sibling, 'no `${name}Memory` sibling should be auto-created').toBeFalsy();
});

it('rejects duplicate harness name', async () => {
Expand All @@ -72,12 +77,9 @@ describe('integration: harness add/remove lifecycle', () => {
const config = await readProjectConfig(project.projectPath);
const found = config.harnesses?.find((h: { name: string }) => h.name === harnessName);
expect(found, `Harness "${harnessName}" should be removed`).toBeFalsy();

const associatedMemory = (config.memories ?? []).find((m: { name: string }) => m.name === `${harnessName}Memory`);
expect(associatedMemory, 'Associated memory should be removed with harness').toBeFalsy();
});

it('re-adds harness after removal without duplicate memory error', async () => {
it('re-adds harness after removal', async () => {
const result = await runCLI(['add', 'harness', '--name', harnessName, '--json'], project.projectPath);

expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
Expand Down Expand Up @@ -236,7 +238,6 @@ describe('integration: create project with harness', () => {

describe('integration: harness config shape', () => {
let project: TestProject;
const gatedEnv = { ENABLE_GATED_FEATURES: '1' };

beforeAll(async () => {
project = await createTestProject({ noAgent: true });
Expand Down Expand Up @@ -420,9 +421,34 @@ describe('integration: harness config shape', () => {
expect(git.path).toBe('pack');
expect(git.auth).toEqual({ credentialName: 'gitCred', username: 'git-user' });
});

it('adds AWS skills — all paths and a path filter', async () => {
await runCLI(['add', 'harness', '--name', 'AwsSkillHarness', '--no-memory', '--json'], project.projectPath);

const all = await runCLI(
['add', 'skill', '--harness', 'AwsSkillHarness', '--aws-skills', '--json'],
project.projectPath
);
expect(all.exitCode, `stdout: ${all.stdout}, stderr: ${all.stderr}`).toBe(0);

const filtered = await runCLI(
['add', 'skill', '--harness', 'AwsSkillHarness', '--aws-skills', 'core-skills/*', '--json'],
project.projectPath
);
expect(filtered.exitCode, `stdout: ${filtered.stdout}, stderr: ${filtered.stderr}`).toBe(0);

const spec = await readHarnessSpec(project.projectPath, 'AwsSkillHarness');
// "all" → awsSkills with no paths key; filtered → awsSkills.paths includes the glob.
expect(spec.skills.some((s: { awsSkills?: { paths?: string[] } }) => s.awsSkills && !s.awsSkills.paths)).toBe(
true
);
expect(
spec.skills.some((s: { awsSkills?: { paths?: string[] } }) => s.awsSkills?.paths?.includes('core-skills/*'))
).toBe(true);
});
});

describe('memory modes (gated)', () => {
describe('memory modes', () => {
it('adds a managed-memory harness with explicit strategies', async () => {
const name = 'ManagedMemHarness';
const result = await runCLI(
Expand All @@ -437,8 +463,7 @@ describe('integration: harness config shape', () => {
'SEMANTIC,EPISODIC',
'--json',
],
project.projectPath,
{ env: gatedEnv }
project.projectPath
);

expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
Expand All @@ -447,6 +472,27 @@ describe('integration: harness config shape', () => {
expect(spec.memory.strategies).toEqual(['SEMANTIC', 'EPISODIC']);
});

it('defaults to managed memory when no memory flags are passed', async () => {
const name = 'DefaultMemHarness';
const result = await runCLI(['add', 'harness', '--name', name, '--json'], project.projectPath);

expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
const spec = await readHarnessSpec(project.projectPath, name);
expect(spec.memory.mode).toBe('managed');
});

it('writes disabled memory for --memory-mode disabled (true opt-out, no sibling)', async () => {
const name = 'DisabledMemHarness';
const result = await runCLI(
['add', 'harness', '--name', name, '--memory-mode', 'disabled', '--json'],
project.projectPath
);

expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
const spec = await readHarnessSpec(project.projectPath, name);
expect(spec.memory.mode).toBe('disabled');
});

it('adds an existing-memory harness referencing a sibling by name with tuning', async () => {
const name = 'ExistingMemHarness';
const result = await runCLI(
Expand All @@ -465,8 +511,7 @@ describe('integration: harness config shape', () => {
'5',
'--json',
],
project.projectPath,
{ env: gatedEnv }
project.projectPath
);

expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
Expand All @@ -481,26 +526,17 @@ describe('integration: harness config shape', () => {
{
label: 'retrievalConfig tuning when memory is referenced by ARN',
args: ['--memory-mode', 'existing', '--memory-arn', MEMORY_ARN, '--memory-top-k', '5'],
env: gatedEnv,
},
{
label: '--memory-mode existing without a memory reference',
args: ['--memory-mode', 'existing'],
env: gatedEnv,
},
{
label: '--memory-mode when the gated feature is disabled',
// Force the gate OFF explicitly: cleanSpawnEnv inherits the host process.env, so a
// developer/CI shell with ENABLE_GATED_FEATURES=1 exported would otherwise flip this
// case (the CLI would accept --memory-mode and exit 0). An empty string is "off"
// because isGatedFeaturesEnabled() checks `=== '1'`.
args: ['--memory-mode', 'managed'],
env: { ENABLE_GATED_FEATURES: '' },
label: '--no-memory combined with managed-only flags (--memory-strategies)',
args: ['--no-memory', '--memory-strategies', 'SEMANTIC'],
},
])('rejects $label', async ({ args, env }) => {
const result = await runCLI(['add', 'harness', '--name', 'BadMem', ...args, '--json'], project.projectPath, {
env,
});
])('rejects $label', async ({ args }) => {
const result = await runCLI(['add', 'harness', '--name', 'BadMem', ...args, '--json'], project.projectPath);
expect(result.exitCode).not.toBe(0);
});
});
Expand Down
Loading
Loading