aws · aidandaly24 · Jun 23, 2026
diff --git a/e2e-tests/harness-aws-skills.test.ts b/e2e-tests/harness-aws-skills.test.ts
@@ -0,0 +1,13 @@
+import { createHarnessE2ESuite } from './harness-e2e-helper.js';
+
+// AWS Skills deployed end-to-end: create → add skill --aws-skills → deploy → invoke → confirm the
+// deployed agent actually loaded the skills → teardown. Uses disabled memory so the deploy is fast
+// (no managed-memory provisioning) — this suite is about skills, not memory, which managed-memory
+// e2e covers. The 'loads AWS skills' step asks the agent what skills it has and asserts it references
+// the AWS ones, proving the spec → CFN → runtime path works, not just that the flag parses.
+createHarnessE2ESuite({
+  modelProvider: 'bedrock',
+  awsSkills: 'core-skills/*',
+  skipMemory: true,
+  labelSuffix: 'aws-skills',
+});
diff --git a/e2e-tests/harness-e2e-helper.ts b/e2e-tests/harness-e2e-helper.ts
@@ -15,19 +15,36 @@ interface HarnessE2EConfig {
   modelProvider: 'bedrock' | 'open_ai' | 'gemini';
   /** Env var holding the API key ARN — its value is passed as --api-key-arn. */
   apiKeyArnEnvVar?: string;
+  /** Skip memory entirely (`create --no-harness-memory` → disabled). Mutually exclusive with memoryRoundTrip. */
   skipMemory?: boolean;
   skipInvoke?: boolean;
+  /**
+   * After creating the (managed-memory) harness, exercise a 2-turn memory round-trip: state a fact,
+   * then in the SAME session confirm it's recalled. Proves the execution role's memory data-plane
+   * grant actually works at runtime (the AccessDenied-on-ListEvents scenario) — not just that
+   * invoke returns 200. Only meaningful when the harness has managed (or omitted) memory.
+   */
+  memoryRoundTrip?: boolean;
+  /**
+   * AWS Skills to attach to the harness via `add skill --aws-skills <value>` before deploy. Use ''
+   * for all skills, or a comma-separated path filter (e.g. 'core-skills/*'). When set, an extra
+   * step asks the deployed agent what skills it has and asserts the response references them.
+   */
+  awsSkills?: string;
+  /** Suffix for the suite label, so multiple suites with the same provider don't collide in output. */
+  labelSuffix?: string;
 }
 
 export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
   const hasRequiredVar = !cfg.apiKeyArnEnvVar || !!process.env[cfg.apiKeyArnEnvVar];
   const canRun = baseCanRun && hasRequiredVar;
 
   const providerLabel =
-    cfg.modelProvider === 'open_ai' ? 'OpenAI' : cfg.modelProvider === 'gemini' ? 'Gemini' : 'Bedrock';
+    (cfg.modelProvider === 'open_ai' ? 'OpenAI' : cfg.modelProvider === 'gemini' ? 'Gemini' : 'Bedrock') +
+    (cfg.labelSuffix ? `/${cfg.labelSuffix}` : '');
 
   // note: this is created outside of beforeAll since beforeAll is skipped if all tests are skipped.
-  const logger = getLogger(`harness-${providerLabel.toLowerCase()}`);
+  const logger = getLogger(`harness-${providerLabel.toLowerCase().replace('/', '-')}`);
   if (!canRun) {
     logger.warn(
       `tests are skipped due to insufficient conditions. ` +
@@ -74,6 +91,16 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
       const json = parseJsonOutput(result.stdout) as { projectPath: string };
       projectPath = json.projectPath;
 
+      // Attach AWS Skills to the harness before deploy, if configured. '' means all skills; a
+      // non-empty value is a comma-separated path filter passed as the --aws-skills argument.
+      if (cfg.awsSkills !== undefined) {
+        const skillArgs = ['add', 'skill', '--harness', harnessName, '--aws-skills'];
+        if (cfg.awsSkills) skillArgs.push(cfg.awsSkills);
+        skillArgs.push('--json');
+        const skillResult = await runAgentCoreCLI(skillArgs, projectPath);
+        expect(skillResult.exitCode, `add skill --aws-skills failed: ${skillResult.stderr}`).toBe(0);
+      }
+
       await writeAwsTargets(projectPath);
       installCdkTarball(projectPath);
     }, 300000);
@@ -91,6 +118,9 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
       async () => {
         expect(projectPath, 'Project should have been created').toBeTruthy();
 
+        // Retry the deploy: managed-memory provisioning (3-5 min) can run long, and CFN/global-setup
+        // contention occasionally surfaces a transient non-zero on the first attempt. `deploy` is
+        // idempotent (re-applies the same stack), so a second attempt is safe and lands the stack.
         await retry(
           async () => {
             const result = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath);
@@ -100,11 +130,11 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
             const json = parseJsonOutput(result.stdout) as { success: boolean };
             expect(json.success, 'Deploy should report success').toBe(true);
           },
-          1,
+          2,
           30000
         );
       },
-      600000
+      900000
     );
 
     it.skipIf(!canRun || !!cfg.skipInvoke)(
@@ -131,6 +161,99 @@ export function createHarnessE2ESuite(cfg: HarnessE2EConfig) {
       180000
     );
 
+    // Memory round-trip: prove the execution-role memory data-plane grant works at runtime. The
+    // harness must read/write its managed memory without AccessDenied (the bug #286/#287 fixed) —
+    // an invoke that returns 200 isn't enough; the SECOND turn must recall the fact from the FIRST.
+    it.skipIf(!canRun || !cfg.memoryRoundTrip)(
+      'remembers a fact across turns (managed memory round-trip)',
+      async () => {
+        // 33+ char session id (service constraint), no random/Date in the literal source needed —
+        // harnessName already carries a per-run timestamp suffix, which is enough to isolate runs.
+        const sessionId = `e2e-mem-roundtrip-${harnessName}-padding`;
+
+        const turn1 = await runAgentCoreCLI(
+          [
+            'invoke',
+            '--harness',
+            harnessName,
+            '--session-id',
+            sessionId,
+            '--prompt',
+            'My favorite color is teal. Remember it.',
+            '--json',
+          ],
+          projectPath
+        );
+        expect(turn1.exitCode, `Turn 1 failed: stderr=${turn1.stderr}, stdout=${turn1.stdout}`).toBe(0);
+        const t1 = parseJsonOutput(turn1.stdout) as { success: boolean; response?: string };
+        expect(t1.success, 'Turn 1 invoke should succeed (no AccessDenied on memory)').toBe(true);
+
+        // Recall in a fresh invoke on the same session — the harness must read its memory store.
+        const turn2 = await retry(
+          async () => {
+            const result = await runAgentCoreCLI(
+              [
+                'invoke',
+                '--harness',
+                harnessName,
+                '--session-id',
+                sessionId,
+                '--prompt',
+                'What is my favorite color? Answer with just the color.',
+                '--json',
+              ],
+              projectPath
+            );
+            expect(result.exitCode, `Turn 2 failed: stderr=${result.stderr}, stdout=${result.stdout}`).toBe(0);
+            const json = parseJsonOutput(result.stdout) as { success: boolean; response?: string };
+            expect(json.success, 'Turn 2 invoke should succeed').toBe(true);
+            expect(
+              (json.response ?? '').toLowerCase(),
+              `Harness should recall "teal" from memory; got: ${json.response}`
+            ).toContain('teal');
+            return json;
+          },
+          3,
+          15000
+        );
+        expect(turn2.success).toBe(true);
+      },
+      240000
+    );
+
+    // AWS Skills: prove the deployed agent actually loaded the configured skills (not just that the
+    // spec carried them). Ask the agent what skills/tools it has and assert it references AWS ones.
+    it.skipIf(!canRun || cfg.awsSkills === undefined)(
+      'loads AWS skills on the deployed harness',
+      async () => {
+        await retry(
+          async () => {
+            const result = await runAgentCoreCLI(
+              [
+                'invoke',
+                '--harness',
+                harnessName,
+                '--prompt',
+                'List the AWS skills or tools you have access to. Be brief.',
+                '--json',
+              ],
+              projectPath
+            );
+            expect(result.exitCode, `Skills invoke failed: stderr=${result.stderr}, stdout=${result.stdout}`).toBe(0);
+            const json = parseJsonOutput(result.stdout) as { success: boolean; response?: string };
+            expect(json.success, 'Skills invoke should succeed').toBe(true);
+            expect(
+              (json.response ?? '').toLowerCase(),
+              `Agent should reference its loaded AWS skills; got: ${json.response}`
+            ).toContain('aws');
+          },
+          3,
+          15000
+        );
+      },
+      240000
+    );
+
     it.skipIf(!canRun)(
       'status shows the deployed harness',
       async () => {

diff --git a/e2e-tests/harness-managed-memory.test.ts b/e2e-tests/harness-managed-memory.test.ts
@@ -0,0 +1,8 @@
+import { createHarnessE2ESuite } from './harness-e2e-helper.js';
+
+// Managed memory (the default) deployed end-to-end: create → deploy (provisions a dedicated
+// AgentCore Memory) → invoke → memory round-trip → teardown. The round-trip step is the load-bearing
+// assertion: it proves the harness execution role can read/write its managed memory at runtime
+// without AccessDenied on bedrock-agentcore:ListEvents — the regression #286/#287 fixed and that the
+// ungating must not reintroduce.
+createHarnessE2ESuite({ modelProvider: 'bedrock', memoryRoundTrip: true, labelSuffix: 'managed-memory' });
diff --git a/integ-tests/add-remove-harness.test.ts b/integ-tests/add-remove-harness.test.ts
@@ -51,10 +51,15 @@ describe('integration: harness add/remove lifecycle', () => {
     expect(await exists(promptPath), 'system-prompt.md should exist').toBe(true);
   });
 
-  it('auto-creates memory resource', async () => {
+  it('defaults to managed memory and creates NO sibling memory resource', async () => {
+    // The harness owns its memory internally (managed mode). It must NOT auto-create a
+    // `${name}Memory` sibling in the project (the legacy pre-managed-memory behavior).
+    const spec = await readHarnessSpec(project.projectPath, harnessName);
+    expect(spec.memory?.mode, 'default harness memory should be managed').toBe('managed');
+
     const config = await readProjectConfig(project.projectPath);
-    const memories = config.memories ?? [];
-    expect(memories.length, 'Should have auto-created memory').toBeGreaterThan(0);
+    const sibling = (config.memories ?? []).find((m: { name: string }) => m.name === `${harnessName}Memory`);
+    expect(sibling, 'no `${name}Memory` sibling should be auto-created').toBeFalsy();
   });
 
   it('rejects duplicate harness name', async () => {
@@ -72,12 +77,9 @@ describe('integration: harness add/remove lifecycle', () => {
     const config = await readProjectConfig(project.projectPath);
     const found = config.harnesses?.find((h: { name: string }) => h.name === harnessName);
     expect(found, `Harness "${harnessName}" should be removed`).toBeFalsy();
-
-    const associatedMemory = (config.memories ?? []).find((m: { name: string }) => m.name === `${harnessName}Memory`);
-    expect(associatedMemory, 'Associated memory should be removed with harness').toBeFalsy();
   });
 
-  it('re-adds harness after removal without duplicate memory error', async () => {
+  it('re-adds harness after removal', async () => {
     const result = await runCLI(['add', 'harness', '--name', harnessName, '--json'], project.projectPath);
 
     expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
@@ -236,7 +238,6 @@ describe('integration: create project with harness', () => {
 
 describe('integration: harness config shape', () => {
   let project: TestProject;
-  const gatedEnv = { ENABLE_GATED_FEATURES: '1' };
 
   beforeAll(async () => {
     project = await createTestProject({ noAgent: true });
@@ -420,9 +421,34 @@ describe('integration: harness config shape', () => {
       expect(git.path).toBe('pack');
       expect(git.auth).toEqual({ credentialName: 'gitCred', username: 'git-user' });
     });
+
+    it('adds AWS skills — all paths and a path filter', async () => {
+      await runCLI(['add', 'harness', '--name', 'AwsSkillHarness', '--no-memory', '--json'], project.projectPath);
+
+      const all = await runCLI(
+        ['add', 'skill', '--harness', 'AwsSkillHarness', '--aws-skills', '--json'],
+        project.projectPath
+      );
+      expect(all.exitCode, `stdout: ${all.stdout}, stderr: ${all.stderr}`).toBe(0);
+
+      const filtered = await runCLI(
+        ['add', 'skill', '--harness', 'AwsSkillHarness', '--aws-skills', 'core-skills/*', '--json'],
+        project.projectPath
+      );
+      expect(filtered.exitCode, `stdout: ${filtered.stdout}, stderr: ${filtered.stderr}`).toBe(0);
+
+      const spec = await readHarnessSpec(project.projectPath, 'AwsSkillHarness');
+      // "all" → awsSkills with no paths key; filtered → awsSkills.paths includes the glob.
+      expect(spec.skills.some((s: { awsSkills?: { paths?: string[] } }) => s.awsSkills && !s.awsSkills.paths)).toBe(
+        true
+      );
+      expect(
+        spec.skills.some((s: { awsSkills?: { paths?: string[] } }) => s.awsSkills?.paths?.includes('core-skills/*'))
+      ).toBe(true);
+    });
   });
 
-  describe('memory modes (gated)', () => {
+  describe('memory modes', () => {
     it('adds a managed-memory harness with explicit strategies', async () => {
       const name = 'ManagedMemHarness';
       const result = await runCLI(
@@ -437,8 +463,7 @@ describe('integration: harness config shape', () => {
           'SEMANTIC,EPISODIC',
           '--json',
         ],
-        project.projectPath,
-        { env: gatedEnv }
+        project.projectPath
       );
 
       expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
@@ -447,6 +472,27 @@ describe('integration: harness config shape', () => {
       expect(spec.memory.strategies).toEqual(['SEMANTIC', 'EPISODIC']);
     });
 
+    it('defaults to managed memory when no memory flags are passed', async () => {
+      const name = 'DefaultMemHarness';
+      const result = await runCLI(['add', 'harness', '--name', name, '--json'], project.projectPath);
+
+      expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
+      const spec = await readHarnessSpec(project.projectPath, name);
+      expect(spec.memory.mode).toBe('managed');
+    });
+
+    it('writes disabled memory for --memory-mode disabled (true opt-out, no sibling)', async () => {
+      const name = 'DisabledMemHarness';
+      const result = await runCLI(
+        ['add', 'harness', '--name', name, '--memory-mode', 'disabled', '--json'],
+        project.projectPath
+      );
+
+      expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
+      const spec = await readHarnessSpec(project.projectPath, name);
+      expect(spec.memory.mode).toBe('disabled');
+    });
+
     it('adds an existing-memory harness referencing a sibling by name with tuning', async () => {
       const name = 'ExistingMemHarness';
       const result = await runCLI(
@@ -465,8 +511,7 @@ describe('integration: harness config shape', () => {
           '5',
           '--json',
         ],
-        project.projectPath,
-        { env: gatedEnv }
+        project.projectPath
       );
 
       expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0);
@@ -481,26 +526,17 @@ describe('integration: harness config shape', () => {
       {
         label: 'retrievalConfig tuning when memory is referenced by ARN',
         args: ['--memory-mode', 'existing', '--memory-arn', MEMORY_ARN, '--memory-top-k', '5'],
-        env: gatedEnv,
       },
       {
         label: '--memory-mode existing without a memory reference',
         args: ['--memory-mode', 'existing'],
-        env: gatedEnv,
       },
       {
-        label: '--memory-mode when the gated feature is disabled',
-        // Force the gate OFF explicitly: cleanSpawnEnv inherits the host process.env, so a
-        // developer/CI shell with ENABLE_GATED_FEATURES=1 exported would otherwise flip this
-        // case (the CLI would accept --memory-mode and exit 0). An empty string is "off"
-        // because isGatedFeaturesEnabled() checks `=== '1'`.
-        args: ['--memory-mode', 'managed'],
-        env: { ENABLE_GATED_FEATURES: '' },
+        label: '--no-memory combined with managed-only flags (--memory-strategies)',
+        args: ['--no-memory', '--memory-strategies', 'SEMANTIC'],
       },
-    ])('rejects $label', async ({ args, env }) => {
-      const result = await runCLI(['add', 'harness', '--name', 'BadMem', ...args, '--json'], project.projectPath, {
-        env,
-      });
+    ])('rejects $label', async ({ args }) => {
+      const result = await runCLI(['add', 'harness', '--name', 'BadMem', ...args, '--json'], project.projectPath);
       expect(result.exitCode).not.toBe(0);
     });
   });